setDescription('36kr.com'); } public function execute(InputInterface $input, OutputInterface $output): int { // 新闻:/touch/reconstruct/article/list/BBM54PGAwangning/0-10.html $this->_start(); return 0; run(function () { $this->spiderStart(); }); return 0; } // 采集新闻板块 private function _start() { $cache = []; AppNews::query()->where('is_delete', 0)->each(function($item) { try { if (!$item->platform) { throw new \Exception('!'); } if (!isset($cache['website'][$item->platform])) { $cache['website'][$item->platform] = AppWebsiteConfig::find($item->platform)->toArray(); } // 主域名 $domain = trim($cache['website'][$item->platform]['app_domain'], '/'); if (!isset($cache['column'][$item->column_tag])) { $cache['column'][$item->column_tag] = AppNewsColumn::find($item->column_tag)?->toArray() ?: ''; } $column = $cache['column'][$item->column_tag]['url']; if (!isset($cache['column_2'][$item->second_column])) { $cache['column_2'][$item->second_column] = AppNewsSecondColumn::find($item->second_column)?->toArray() ?: ''; } $secondColumn = $cache['column_2'][$item->second_column]['url']; var_dump("https://{$domain}{$column}{$secondColumn}/{$item->id}"); }catch (\Throwable $exception) { var_dump($exception->getMessage()); } }, 1); return; $columnUrl = '/pu=sz%401321_480/from=0/ssid=0/s?word=https%3A%2F%2Fhegsfc.com%2Fnews%2F155'; list($res, $code) = $this->request($this->baseUrl . $columnUrl); preg_match_all('/window\.initialState=(\{.*\})/', $res, $matches); $articleList = json_decode(current(end($matches)), true); foreach ($articleList['information']['informationList']['itemList'] ?? [] as $item) { $itemId = $item['itemId']; $title = $item['templateMaterial']['widgetTitle']; var_dump($title); // var_dump($item);return; $url = $this->baseUrl . '/p/' . $itemId; var_dump($url); list($res, $code) = $this->request($url); sleep(2); (new Crawler($res))->filter('.articleDetailContent')->each(function ($node) { var_dump($node->html()); }); // var_dump($res); // var_dump($url); // return; } return; return; // foreach ($articleList as $item) { // foreach ($item as $article) { // try { // if ($is = AppNews::query()->where('source_url', $article['docid'])->where('source_platform', self::PLATFORM)->exists()) { // continue; // } // $model = new AppNews(); // $model->title = $article['title']; // $model->description = $article['digest'] ?: $article['title']; // // video 类型 和 无图片不采集 // if (!$article['imgsrc'] || (isset($article['skipType']) && $article['skipType'] == 'video')) { // continue; // } // $coverContent = file_get_contents($article['imgsrc']); // $savePath = 'uploads/news/' . date('Y-m-d') . '/'; // if (!file_exists($savePath)) { // mkdir($savePath, 0777, true); // 0777 是文件夹的权限,true 表示递归创建子目录 // } // $saveFile = $savePath . AppHelper::generateAid() . '.png'; // file_put_contents('/www/wwwroot/' . $saveFile, $coverContent); // $model->cover = env('APP_DOMAIN', '') . '/' . $saveFile; // $model->platform = 3; // $model->source_url = $article['docid']; // $model->source_platform = self::PLATFORM; // $model->save(); // }catch (\Throwable $exception) { // var_dump($exception->getMessage()); // continue; // } // } // } } private function _getTask($brand): \Generator { $query = Db::table('app_brands'); if ($brand) { $query->where(['id' => $brand]); } $query->where('id', '>', 1)->orderBy('id'); foreach ($query->cursor() as $row) { yield $row; } } private function _getTaskName($name): string { return strtolower(strtr($name, [ '.' => '-', ' ' => '-' ])); } public function spiderStart(): void { list($result, $httpCode) = $this->request($this->getBaseUrl() . '/fashion/street-style/'); preg_match_all('/