|
@@ -685,30 +685,95 @@ class User extends Controller
|
|
|
$preg = '/<div\s+class="structItem-title"\s+uix-href="\/threads.+\/">/';
|
|
|
$matches = [];
|
|
|
preg_match_all($preg, $f, $matches);
|
|
|
+ if (empty($matches[0])){
|
|
|
+ return $this->renderSuccess();
|
|
|
+ }
|
|
|
+
|
|
|
//dd($matches);
|
|
|
|
|
|
//匹配详情页的里面的链接
|
|
|
$pregSon = '/<div\s+class="bbWrapper"><a\s+href="http\S+"/';
|
|
|
- $posts = $matches[0];
|
|
|
- unset($posts[0]);
|
|
|
+ $pregSon1 = '/<div\s+class="bbWrapper"><div>\s+<a\s+href="http\S+"/';
|
|
|
+ $posts = array_slice($matches[0],2,3);
|
|
|
+ //dd($posts);
|
|
|
+
|
|
|
+ //$titleStr = 'rel="nofollow ugc noopener">Scoring Endgame | Former CDC Tobacco Control Director Talks Battle Over Vaping | RegWatch</a>';
|
|
|
+ $pregTitleSon = '/rel="nofollow\s+ugc\s+noopener">.+<\/a>/';
|
|
|
+ $articles = [];
|
|
|
|
|
|
foreach ($posts as $key => $match) {
|
|
|
$matches1 = [];
|
|
|
+ $matches2 = [];
|
|
|
$ps = strpos($match, '/', 0);
|
|
|
+ if ($ps === false) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
//$pe = strrpos($match,'/',0);
|
|
|
$uixHref = substr($match, $ps, -2);
|
|
|
+ //dd($uixHref);
|
|
|
sleep(1);
|
|
|
$f1 = file_get_contents($ecfHost . $uixHref);
|
|
|
//file_put_contents('b.html',$f1);
|
|
|
+
|
|
|
+ //dd('hahaha');
|
|
|
+ if (empty($f1)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ //file_put_contents('b.html',$f1);
|
|
|
//dd($uixHref,$f1);
|
|
|
- preg_match_all($pregSon, $f1, $matches1);
|
|
|
- $postDetail = $matches[0][0];
|
|
|
- //$detail2 = file_get_contents($postDetail);
|
|
|
+ //todo
|
|
|
+ preg_match($pregSon, $f1, $matches1);
|
|
|
+ //dd($matches1);
|
|
|
+ if (empty($matches1)){
|
|
|
+ //dd('ooo');
|
|
|
+ preg_match($pregSon1, $f1, $matches1);
|
|
|
+ //dd($matches1);
|
|
|
+ if (empty($matches1)){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //文章详情的原始链接
|
|
|
+ $postDetailOriginal = $matches1[0];
|
|
|
|
|
|
+ $ps3 = strpos($postDetailOriginal,'f',0);
|
|
|
+ if ($ps3 === false){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ $postDetailOriginal = substr($postDetailOriginal,$ps3 + 3,-1);
|
|
|
+ //dd($postDetailOriginal);
|
|
|
+
|
|
|
+ //文章的标题
|
|
|
+ preg_match($pregTitleSon, $f1, $matches2);
|
|
|
+ dd($matches2);
|
|
|
+ $postTitle = $matches2[0];
|
|
|
+ $ps1 = strpos($postTitle, '>', 0);
|
|
|
+ if ($ps1 === false) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ $title = substr($postTitle, $ps1 + 1, -4);
|
|
|
+
|
|
|
+ $article = [
|
|
|
+ "title" => $title,
|
|
|
+ "show_type" => 10,
|
|
|
+ "category_id" => 10002,
|
|
|
+ "image_id" => 0,
|
|
|
+ "content" => '<a href="' . $postDetailOriginal . '">' . $title . '</a>',
|
|
|
+ "sort" => 100,
|
|
|
+ 'status' => 1,
|
|
|
+ "virtual_views" => rand(100, 500),
|
|
|
+ "actual_views" => rand(100, 500),
|
|
|
+ "is_delete" => 0,
|
|
|
+ "store_id" => 10001,
|
|
|
+ "create_time" => time(),
|
|
|
+ "update_time" => time(),
|
|
|
+ ];
|
|
|
+
|
|
|
+ $articles[] = $article;
|
|
|
|
|
|
- dd($matches1[0]);
|
|
|
+ //$detail2 = file_get_contents($postDetail);
|
|
|
+ //dd($matches1[0]);
|
|
|
}
|
|
|
- dd($matches);
|
|
|
+ dd($articles);
|
|
|
|
|
|
file_put_contents('a.html', $f);
|
|
|
dd($f);
|