curl -s https://laravel.build/my_scraping | bash cd my_scraping/ vim my.cnf [mysqld] character-set-server = utf8mb4 collation-server = utf8mb4_general_ci [client] default-character-set=utf8mb4 vim docker-compose.yml # ... volumes: - 'sailmysql:/var/lib/mysql' - ./my.cnf:/etc/mysql/conf.d/my.cnf ./vendor/bin/sail up -d --build docker ps CONTAINER ID IMAGE COMMAND PORTS ... 67309004de4b sail-8.0/app "start-container" 0.0.0.0:80->80/tcp, 8000/tcp ... aabaeb2f98dc selenium/standalone-chrome "/opt/bin/entry_poin…" 4444/tcp ... 17e6f413e14b getmeili/meilisearch:latest "tini -- /bin/sh -c …" 0.0.0.0:7700->7700/tcp ... 06eef68262bc mailhog/mailhog:latest "MailHog" 0.0.0.0:1025->1025/tcp, 0.0.0.0:8025->8025/tcp ... 63dfe21f304a redis:alpine "docker-entrypoint.s…" 0.0.0.0:6379->6379/tcp ... cfbf2e6c3ee4 mysql:8.0 "docker-entrypoint.s…" 0.0.0.0:3306->3306/tcp, 33060/tcp ... ### ./vendor/bin/sail composer require weidner/goutte vim config/app.php return [ // ... 'providers' => [ // ... Weidner\Goutte\GoutteServiceProvider::class, ], // ... 'aliases' => [ // ... 'Goutte' => Weidner\Goutte\GoutteFacade::class, ], ]; ./vendor/bin/sail php artisan make:command ScrapeMy vim app/Console/Commands/ScrapeMy.php // ... protected $signature = 'scrape:my'; protected $description = 'Scrape My'; // ... public function handle() { $crawler = \Goutte::request('GET', 'https://yemaosheng.com/'); $crawler->filter('article > header > h1')->each(function ($node) { dump($node->text()); }); } // ... ./vendor/bin/sail artisan list scrape scrape:my Scrape My ./vendor/bin/sail artisan scrape:my "今日は永住権を取得しました" "小鳥の保温について" ... # php artisan make:model MyUrls --migration Model created successfully. Created Migration: 2021_07_02_053959_create_my_urls_table vim database/migrations/2021_07_02_053959_create_my_urls_table.php // ... public function up() { Schema::create('my_urls', function (Blueprint $table) { $table->id(); $table->string('url'); $table->timestamps(); }); } // ... ./vendor/bin/sail artisan migrate ### vim app/Console/Commands/ScrapeMy.php // ... use Carbon\Carbon; use Illuminate\Support\Facades\DB; // ... public function handle() { $this->truncateTables(); $this->saveUrls(); } private function truncateTables(){ DB::table('my_urls')->truncate(); } private function saveUrls(){ $url = 'https://tenshoku.my.jp/list/kwphp/pg2/'; $crawler = \Goutte::request('GET', $url); $urls = $crawler->filter('.cassetteRecruit__copy > a')->each (function ($node) { $href = $node->attr('href'); return [ 'url' => substr($href, 0, strpos($href,'/', 1)+1), 'created_at' => Carbon::now(), 'updated_at' => Carbon::now(), ]; }); DB::table('my_urls')->insert($urls); } ./vendor/bin/sail artisan scrape:my ### ./vendor/bin/sail artisan make:model MyJobs --migration Model created successfully. Created Migration: 2021_07_02_060002_create_my_jobs_table vim database/migrations/2021_07_02_060002_create_my_jobs_table.php // ... public function up() { Schema::create('my_jobs', function (Blueprint $table) { $table->id(); $table->string('url'); $table->string('title'); $table->string('company_name'); $table->text('features'); $table->timestamps(); }); } // ... ./vendor/bin/sail artisan migrate Migrating: 2021_07_02_060002_create_my_jobs_table Migrated: 2021_07_02_060002_create_my_jobs_table (52.56ms) vim app/Console/Commands/ScrapeMy.php <?php namespace App\Console\Commands; use App\Models\MynaviUrls; use App\Models\MynaviJobs; use Illuminate\Console\Command; use Carbon\Carbon; use Illuminate\Support\Facades\DB; class ScrapeMynavi extends Command { const HOST = 'https://tenshoku.mynavi.jp'; const FILE_PATH = 'app/mynavi_jobs.csv'; const PAGE_NUM = 1; protected $signature = 'scrape:mynavi'; protected $description = 'Scrape Mynavi'; public function __construct() { parent::__construct(); } public function handle() { $this->truncateTables(); $this->saveUrls(); $this->saveJobs(); $this->exportCSV(); } private function truncateTables(){ DB::table('mynavi_urls')->truncate(); DB::table('mynavi_jobs')->truncate(); } private function saveUrls(){ foreach(range(1,$this::PAGE_NUM) as $num) { $url = $this::HOST.'/list/kwphp/pg' . $num . '/'; $crawler = \Goutte::request('GET', $url); $urls = $crawler->filter('.cassetteRecruit__copy > a')->each (function ($node) { $href = $node->attr('href'); return [ 'url' => substr($href, 0, strpos($href,'/', 1)+1), 'created_at' => Carbon::now(), 'updated_at' => Carbon::now(), ]; }); DB::table('mynavi_urls')->insert($urls); //sleep(5); } } private function saveJobs(){ foreach(MynaviUrls::all() as $mynaviUrl){ $url = $this::HOST.$mynaviUrl->url; $crawler = \Goutte::request('GET', $url); MynaviJobs::create([ 'url' => $url, 'title'=> $this->getTitle($crawler), 'company_name' => $this->getCompany($crawler), 'features' => $this->getFeatures($crawler), 'created_at' => Carbon::now(), 'updated_at' => Carbon::now(), ]); sleep(1); } } private function getTitle($crawler){ return $crawler->filter('.occName')->text(); } private function getCompany($crawler){ return $crawler->filter('.companyName')->text(); } private function getFeatures($crawler){ $features = $crawler->filter('.cassetteRecruit__attribute > li > span')->each(function($node){ return $node->text(); }); return implode(' && ', $features); } private function exportCSV(){ $file = fopen(storage_path($this::FILE_PATH), 'w'); if(!$file){ throw new \Exception('ファイルを作成に失敗しました!'); } if(!fputcsv($file,['id','url','title','company_name','features'])){ throw new \Exception('ヘッダの書き込みに失敗しました!'); } foreach(MynaviJobs::all() as $job){ if(!fputcsv($file,[$job->id,$job->url,$job->title,$job->company_name,$job->features])){ throw new \Exception('CSVファイルの書き込みに失敗しました!'); } } fclose($file); } } |