curl -s https://laravel.build/my_scraping | bash
cd my_scraping/
vim my.cnf
[mysqld]
character-set-server = utf8mb4
collation-server = utf8mb4_general_ci
[client]
default-character-set=utf8mb4
vim docker-compose.yml
# ...
volumes:
- 'sailmysql:/var/lib/mysql'
- ./my.cnf:/etc/mysql/conf.d/my.cnf
./vendor/bin/sail up -d --build
docker ps
CONTAINER ID IMAGE COMMAND PORTS ...
67309004de4b sail-8.0/app "start-container" 0.0.0.0:80->80/tcp, 8000/tcp ...
aabaeb2f98dc selenium/standalone-chrome "/opt/bin/entry_poin…" 4444/tcp ...
17e6f413e14b getmeili/meilisearch:latest "tini -- /bin/sh -c …" 0.0.0.0:7700->7700/tcp ...
06eef68262bc mailhog/mailhog:latest "MailHog" 0.0.0.0:1025->1025/tcp, 0.0.0.0:8025->8025/tcp ...
63dfe21f304a redis:alpine "docker-entrypoint.s…" 0.0.0.0:6379->6379/tcp ...
cfbf2e6c3ee4 mysql:8.0 "docker-entrypoint.s…" 0.0.0.0:3306->3306/tcp, 33060/tcp ...
###
./vendor/bin/sail composer require weidner/goutte
vim config/app.php
return [
// ...
'providers' => [
// ...
Weidner\Goutte\GoutteServiceProvider::class,
],
// ...
'aliases' => [
// ...
'Goutte' => Weidner\Goutte\GoutteFacade::class,
],
];
./vendor/bin/sail php artisan make:command ScrapeMy
vim app/Console/Commands/ScrapeMy.php
// ...
protected $signature = 'scrape:my';
protected $description = 'Scrape My';
// ...
public function handle()
{
$crawler = \Goutte::request('GET', 'https://yemaosheng.com/');
$crawler->filter('article > header > h1')->each(function ($node) {
dump($node->text());
});
}
// ...
./vendor/bin/sail artisan list
scrape
scrape:my Scrape My
./vendor/bin/sail artisan scrape:my
"今日は永住権を取得しました"
"小鳥の保温について"
...
#
php artisan make:model MyUrls --migration
Model created successfully.
Created Migration: 2021_07_02_053959_create_my_urls_table
vim database/migrations/2021_07_02_053959_create_my_urls_table.php
// ...
public function up()
{
Schema::create('my_urls', function (Blueprint $table) {
$table->id();
$table->string('url');
$table->timestamps();
});
}
// ...
./vendor/bin/sail artisan migrate
###
vim app/Console/Commands/ScrapeMy.php
// ...
use Carbon\Carbon;
use Illuminate\Support\Facades\DB;
// ...
public function handle()
{
$this->truncateTables();
$this->saveUrls();
}
private function truncateTables(){
DB::table('my_urls')->truncate();
}
private function saveUrls(){
$url = 'https://tenshoku.my.jp/list/kwphp/pg2/';
$crawler = \Goutte::request('GET', $url);
$urls = $crawler->filter('.cassetteRecruit__copy > a')->each
(function ($node) {
$href = $node->attr('href');
return [
'url' => substr($href, 0, strpos($href,'/', 1)+1),
'created_at' => Carbon::now(),
'updated_at' => Carbon::now(),
];
});
DB::table('my_urls')->insert($urls);
}
./vendor/bin/sail artisan scrape:my
###
./vendor/bin/sail artisan make:model MyJobs --migration
Model created successfully.
Created Migration: 2021_07_02_060002_create_my_jobs_table
vim database/migrations/2021_07_02_060002_create_my_jobs_table.php
// ...
public function up()
{
Schema::create('my_jobs', function (Blueprint $table) {
$table->id();
$table->string('url');
$table->string('title');
$table->string('company_name');
$table->text('features');
$table->timestamps();
});
}
// ...
./vendor/bin/sail artisan migrate
Migrating: 2021_07_02_060002_create_my_jobs_table
Migrated: 2021_07_02_060002_create_my_jobs_table (52.56ms)
vim app/Console/Commands/ScrapeMy.php
<?php
namespace App\Console\Commands;
use App\Models\MynaviUrls;
use App\Models\MynaviJobs;
use Illuminate\Console\Command;
use Carbon\Carbon;
use Illuminate\Support\Facades\DB;
class ScrapeMynavi extends Command
{
const HOST = 'https://tenshoku.mynavi.jp';
const FILE_PATH = 'app/mynavi_jobs.csv';
const PAGE_NUM = 1;
protected $signature = 'scrape:mynavi';
protected $description = 'Scrape Mynavi';
public function __construct()
{
parent::__construct();
}
public function handle()
{
$this->truncateTables();
$this->saveUrls();
$this->saveJobs();
$this->exportCSV();
}
private function truncateTables(){
DB::table('mynavi_urls')->truncate();
DB::table('mynavi_jobs')->truncate();
}
private function saveUrls(){
foreach(range(1,$this::PAGE_NUM) as $num)
{
$url = $this::HOST.'/list/kwphp/pg' . $num . '/';
$crawler = \Goutte::request('GET', $url);
$urls = $crawler->filter('.cassetteRecruit__copy > a')->each
(function ($node) {
$href = $node->attr('href');
return [
'url' => substr($href, 0, strpos($href,'/', 1)+1),
'created_at' => Carbon::now(),
'updated_at' => Carbon::now(),
];
});
DB::table('mynavi_urls')->insert($urls);
//sleep(5);
}
}
private function saveJobs(){
foreach(MynaviUrls::all() as $mynaviUrl){
$url = $this::HOST.$mynaviUrl->url;
$crawler = \Goutte::request('GET', $url);
MynaviJobs::create([
'url' => $url,
'title'=> $this->getTitle($crawler),
'company_name' => $this->getCompany($crawler),
'features' => $this->getFeatures($crawler),
'created_at' => Carbon::now(),
'updated_at' => Carbon::now(),
]);
sleep(1);
}
}
private function getTitle($crawler){
return $crawler->filter('.occName')->text();
}
private function getCompany($crawler){
return $crawler->filter('.companyName')->text();
}
private function getFeatures($crawler){
$features = $crawler->filter('.cassetteRecruit__attribute > li > span')->each(function($node){
return $node->text();
});
return implode(' && ', $features);
}
private function exportCSV(){
$file = fopen(storage_path($this::FILE_PATH), 'w');
if(!$file){
throw new \Exception('ファイルを作成に失敗しました!');
}
if(!fputcsv($file,['id','url','title','company_name','features'])){
throw new \Exception('ヘッダの書き込みに失敗しました!');
}
foreach(MynaviJobs::all() as $job){
if(!fputcsv($file,[$job->id,$job->url,$job->title,$job->company_name,$job->features])){
throw new \Exception('CSVファイルの書き込みに失敗しました!');
}
}
fclose($file);
}
} |