Explorar el Código

fake articles

541469799@qq.com hace 1 año
padre
commit
2775ff989f
Se han modificado 1 ficheros con 72 adiciones y 7 borrados
  1. 72 7
      app/index/controller/User.php

+ 72 - 7
app/index/controller/User.php

@@ -685,30 +685,95 @@ class User extends Controller
         $preg = '/<div\s+class="structItem-title"\s+uix-href="\/threads.+\/">/';
         $matches = [];
         preg_match_all($preg, $f, $matches);
+        if (empty($matches[0])){
+            return $this->renderSuccess();
+        }
+
         //dd($matches);
 
         //匹配详情页的里面的链接
         $pregSon = '/<div\s+class="bbWrapper"><a\s+href="http\S+"/';
-        $posts = $matches[0];
-        unset($posts[0]);
+        $pregSon1 = '/<div\s+class="bbWrapper"><div>\s+<a\s+href="http\S+"/';
+        $posts = array_slice($matches[0],2,3);
+        //dd($posts);
+
+        //$titleStr = 'rel="nofollow ugc noopener">Scoring Endgame | Former CDC Tobacco Control Director Talks Battle Over Vaping | RegWatch</a>';
+        $pregTitleSon = '/rel="nofollow\s+ugc\s+noopener">.+<\/a>/';
+        $articles = [];
 
         foreach ($posts as $key => $match) {
             $matches1 = [];
+            $matches2 = [];
             $ps = strpos($match, '/', 0);
+            if ($ps === false) {
+                continue;
+            }
             //$pe = strrpos($match,'/',0);
             $uixHref = substr($match, $ps, -2);
+            //dd($uixHref);
             sleep(1);
             $f1 = file_get_contents($ecfHost . $uixHref);
             //file_put_contents('b.html',$f1);
+
+            //dd('hahaha');
+            if (empty($f1)) {
+                continue;
+            }
+            //file_put_contents('b.html',$f1);
             //dd($uixHref,$f1);
-            preg_match_all($pregSon, $f1, $matches1);
-            $postDetail = $matches[0][0];
-            //$detail2 = file_get_contents($postDetail);
+            //todo
+            preg_match($pregSon, $f1, $matches1);
+            //dd($matches1);
+            if (empty($matches1)){
+                //dd('ooo');
+                preg_match($pregSon1, $f1, $matches1);
+                //dd($matches1);
+                if (empty($matches1)){
+                    continue;
+                }
+            }
+            //文章详情的原始链接
+            $postDetailOriginal = $matches1[0];
 
+            $ps3 = strpos($postDetailOriginal,'f',0);
+            if ($ps3 === false){
+                continue;
+            }
+            $postDetailOriginal = substr($postDetailOriginal,$ps3 + 3,-1);
+            //dd($postDetailOriginal);
+
+            //文章的标题
+            preg_match($pregTitleSon, $f1, $matches2);
+            dd($matches2);
+            $postTitle = $matches2[0];
+            $ps1 = strpos($postTitle, '>', 0);
+            if ($ps1 === false) {
+                continue;
+            }
+            $title = substr($postTitle, $ps1 + 1, -4);
+
+            $article = [
+                "title" => $title,
+                "show_type" => 10,
+                "category_id" => 10002,
+                "image_id" => 0,
+                "content" => '<a href="' . $postDetailOriginal . '">' . $title . '</a>',
+                "sort" => 100,
+                'status' => 1,
+                "virtual_views" => rand(100, 500),
+                "actual_views" => rand(100, 500),
+                "is_delete" => 0,
+                "store_id" => 10001,
+                "create_time" => time(),
+                "update_time" => time(),
+            ];
+
+            $articles[] = $article;
 
-            dd($matches1[0]);
+            //$detail2 = file_get_contents($postDetail);
+            //dd($matches1[0]);
         }
-        dd($matches);
+        dd($articles);
 
         file_put_contents('a.html', $f);
         dd($f);