|
@@ -678,14 +678,39 @@ class User extends Controller
|
|
|
|
|
|
public function genArticles()
|
|
|
{
|
|
|
+ $ecfHost = 'https://www.e-cigarette-forum.com';
|
|
|
$f = file_get_contents('https://www.e-cigarette-forum.com/forums/e-cigarette-news.629/');
|
|
|
- //$f = file_get_contents('https://www.e-cigarette-forum.com/threads/fda-commissioner-denigrates-tobacco-harm-reduction.984710/');
|
|
|
- $preg = '/<div class=\"structItem-title\" uix-href=\".?/\">/';
|
|
|
+ //$f = '<div class="structItem-title" uix-href="/threads/will-not-stop-tobacco-control-unlikely-to-validate-nicotine-vaping-regwatch.983599/">';
|
|
|
+ //帖子列表匹配
|
|
|
+ $preg = '/<div\s+class="structItem-title"\s+uix-href="\/threads.+\/">/';
|
|
|
$matches = [];
|
|
|
- preg_match($preg,$f,$matches);
|
|
|
+ preg_match_all($preg, $f, $matches);
|
|
|
+ //dd($matches);
|
|
|
+
|
|
|
+ //匹配详情页的里面的链接
|
|
|
+ $pregSon = '/<div\s+class="bbWrapper"><a\s+href="http\S+"/';
|
|
|
+ $posts = $matches[0];
|
|
|
+ unset($posts[0]);
|
|
|
+
|
|
|
+ foreach ($posts as $key => $match) {
|
|
|
+ $matches1 = [];
|
|
|
+ $ps = strpos($match, '/', 0);
|
|
|
+ //$pe = strrpos($match,'/',0);
|
|
|
+ $uixHref = substr($match, $ps, -2);
|
|
|
+ sleep(1);
|
|
|
+ $f1 = file_get_contents($ecfHost . $uixHref);
|
|
|
+ //file_put_contents('b.html',$f1);
|
|
|
+ //dd($uixHref,$f1);
|
|
|
+ preg_match_all($pregSon, $f1, $matches1);
|
|
|
+ $postDetail = $matches[0][0];
|
|
|
+ //$detail2 = file_get_contents($postDetail);
|
|
|
+
|
|
|
+
|
|
|
+ dd($matches1[0]);
|
|
|
+ }
|
|
|
dd($matches);
|
|
|
|
|
|
- file_put_contents('a.html',$f);
|
|
|
+ file_put_contents('a.html', $f);
|
|
|
dd($f);
|
|
|
}
|
|
|
|