update
This commit is contained in:
147
app/Command/sitemap/Toutiao.php
Normal file
147
app/Command/sitemap/Toutiao.php
Normal file
@ -0,0 +1,147 @@
|
||||
<?php
|
||||
|
||||
namespace App\Command\sitemap;
|
||||
|
||||
use App\Command\spider\BaseSpider;
|
||||
use App\Model\AppNews;
|
||||
use App\Model\AppWebsiteConfig;
|
||||
use Hyperf\Command\Annotation\Command;
|
||||
use Hyperf\DbConnection\Db;
|
||||
use Hyperf\Logger\LoggerFactory;
|
||||
use Psr\Container\ContainerInterface;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use function Swoole\Coroutine\run;
|
||||
|
||||
/**
|
||||
* 生成头条的sitemap
|
||||
*/
|
||||
#[Command]
|
||||
class Toutiao extends BaseSpider
|
||||
{
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected string $baseUrl = 'http://data.zz.baidu.com/urls?site=https://www.jiangsucf.com&token=5CtAd8vD3QQazJjh';
|
||||
|
||||
protected const PLATFORM = 'elle-street';
|
||||
|
||||
public function __construct(protected ContainerInterface $container, LoggerFactory $loggerFactory)
|
||||
{
|
||||
parent::__construct('sitemap:toutiao');
|
||||
}
|
||||
|
||||
public function configure()
|
||||
{
|
||||
parent::configure();
|
||||
// 普通收录
|
||||
//使用说明
|
||||
//1、普通收录工具可以向百度搜索主动推送资源,缩短爬虫发现网站链接的时间,不保证收录和展现效果。
|
||||
//2、API提交和手动提交共享配额,sitemap提交配额不与其他方式共享,具体配额以站点页面显示数据为准 。配额不可累计,当日有效。
|
||||
//3、若链接存在跳转关系,请直接提交跳转后链接。如网站换域名,需提交新域名资源;进行HTTPS改造页面,请提交HTTPS资源。
|
||||
$this->setDescription('生成头条sitemap');
|
||||
$this->addOption('platform', 'p', InputOption::VALUE_OPTIONAL, '指定的平台id.', false);
|
||||
}
|
||||
|
||||
public function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$platform = $input->getOption('platform');
|
||||
|
||||
// 域名
|
||||
$domain = AppWebsiteConfig::find($platform)->app_domain;
|
||||
|
||||
$sitemapArray = [];
|
||||
|
||||
$query = AppNews::query()->select(['id', 'created_at'])->where('is_delete', 0)->get()->toArray();
|
||||
foreach ($query as $item) {
|
||||
// $date = date('Y-m-d', $item['created_at']);
|
||||
$sitemapArray[] = <<<EOF
|
||||
<url>
|
||||
<loc>https://$domain/news/{$item['id']}</loc>
|
||||
<lastmod>{$item['created_at']}</lastmod>
|
||||
</url>
|
||||
EOF;
|
||||
}
|
||||
|
||||
if ($sitemapArray) {
|
||||
$val = implode(PHP_EOL, $sitemapArray);
|
||||
$sitemap = <<<EOF
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<urlset>
|
||||
$val
|
||||
</urlset>
|
||||
EOF;
|
||||
file_put_contents('/www/wwwroot/huoerguosifeichi/sitemap.xml', $sitemap);
|
||||
}
|
||||
//var_dump($links);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private function _getTask($brand): \Generator
|
||||
{
|
||||
$query = Db::table('app_brands');
|
||||
if ($brand) {
|
||||
$query->where(['id' => $brand]);
|
||||
}
|
||||
|
||||
$query->where('id', '>', 1)->orderBy('id');
|
||||
foreach ($query->cursor() as $row) {
|
||||
yield $row;
|
||||
}
|
||||
}
|
||||
|
||||
private function _getTaskName($name): string
|
||||
{
|
||||
return strtolower(strtr($name, [
|
||||
'.' => '-',
|
||||
' ' => '-'
|
||||
]));
|
||||
}
|
||||
|
||||
public function spiderStart(): void
|
||||
{
|
||||
list($result, $httpCode) = $this->request($this->getBaseUrl() . '/fashion/street-style/');
|
||||
|
||||
preg_match_all('/<script id="json-ld" type="application\/ld\+json">([\s\S]*?)<\/script>/', $result, $matches);
|
||||
|
||||
if (!is_array($matches) && count($matches) < 1) {
|
||||
$this->logger->info(self::getPlatform() . " 数据获取失败。");
|
||||
return;
|
||||
}
|
||||
|
||||
$val = json_decode(($matches[1][0]), true);
|
||||
|
||||
$articles = $val[0]['itemListElement'] ?? [];
|
||||
|
||||
if (!$articles) {
|
||||
$this->logger->info(self::getPlatform() . " 文章数据获取失败。");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
$saveImages = [];
|
||||
foreach ($articles as $article) {
|
||||
list($result, $httpCode) = $this->request($article['url']);
|
||||
|
||||
preg_match_all('/<script id="json-ld" type="application\/ld\+json">([\s\S]*?)<\/script>/', $result, $matches);
|
||||
|
||||
if (isset($matches[1][0])) {
|
||||
$val = json_decode($matches[1][0], true);
|
||||
|
||||
$images = $val['about']['itemListElement'];
|
||||
|
||||
foreach ($images as $image) {
|
||||
$saveImages[] = $image['item']['image'];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var_dump($saveImages);
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user