Browse Source

怕新闻

zhangdehua 1 year ago
parent
commit
272c2c3c78
1 changed files with 29 additions and 4 deletions
  1. 29 4
      app/index/controller/User.php

+ 29 - 4
app/index/controller/User.php

@@ -678,14 +678,39 @@ class User extends Controller
 
     public function genArticles()
     {
+        $ecfHost = 'https://www.e-cigarette-forum.com';
         $f = file_get_contents('https://www.e-cigarette-forum.com/forums/e-cigarette-news.629/');
-        //$f = file_get_contents('https://www.e-cigarette-forum.com/threads/fda-commissioner-denigrates-tobacco-harm-reduction.984710/');
-        $preg = '/<div class=\"structItem-title\" uix-href=\".?/\">/';
+        //$f = '<div class="structItem-title" uix-href="/threads/will-not-stop-tobacco-control-unlikely-to-validate-nicotine-vaping-regwatch.983599/">';
+        //帖子列表匹配
+        $preg = '/<div\s+class="structItem-title"\s+uix-href="\/threads.+\/">/';
         $matches = [];
-        preg_match($preg,$f,$matches);
+        preg_match_all($preg, $f, $matches);
+        //dd($matches);
+
+        //匹配详情页的里面的链接
+        $pregSon = '/<div\s+class="bbWrapper"><a\s+href="http\S+"/';
+        $posts = $matches[0];
+        unset($posts[0]);
+
+        foreach ($posts as $key => $match) {
+            $matches1 = [];
+            $ps = strpos($match, '/', 0);
+            //$pe = strrpos($match,'/',0);
+            $uixHref = substr($match, $ps, -2);
+            sleep(1);
+            $f1 = file_get_contents($ecfHost . $uixHref);
+            //file_put_contents('b.html',$f1);
+            //dd($uixHref,$f1);
+            preg_match_all($pregSon, $f1, $matches1);
+            $postDetail = $matches[0][0];
+            //$detail2 = file_get_contents($postDetail);
+
+
+            dd($matches1[0]);
+        }
         dd($matches);
 
-        file_put_contents('a.html',$f);
+        file_put_contents('a.html', $f);
         dd($f);
     }