Skip to content
124 changes: 74 additions & 50 deletions src/app/Console/Commands/AlgoliaImportWorldHeritages.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,19 @@

namespace App\Console\Commands;

use Illuminate\Console\Command;
use App\Models\WorldHeritage;
use Algolia\AlgoliaSearch\Api\SearchClient;

use App\Models\WorldHeritage;
use Illuminate\Console\Command;

class AlgoliaImportWorldHeritages extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'algolia:import-world-heritages
{--chunk=500}
{--truncate}
{--dry-run}';

/**
* The console command description.
*
* @var string
*/
protected $description = 'Upsert world heritages into Algolia index.';

/**
* Execute the console command.
*/
public function handle(): int
{
$appId = config('algolia.algolia_app_id');
Expand All @@ -40,13 +26,13 @@ public function handle(): int
return self::FAILURE;
}

$chunk = max(1, (int)$this->option('chunk'));
$dryRun = (bool)$this->option('dry-run');
$truncate = (bool)$this->option('truncate');
$chunk = max(1, (int) $this->option('chunk'));
$dryRun = (bool) $this->option('dry-run');
$truncate = (bool) $this->option('truncate');
$processed = 0;

$client = SearchClient::create($appId, $apiKey);

// writing code to test this
if ($truncate) {
if ($dryRun) {
$this->info('[dry-run] would clear index');
Expand All @@ -64,54 +50,90 @@ public function handle(): int

WorldHeritage::query()
->with([
'thumbnail',
'countries' => function ($query) {
$query->select(['countries.state_party_code', 'countries.name_jp']);
},
'countries',
])
->select([
'world_heritage_sites.id',
'official_name',
'name',
'world_heritage_sites.official_name',
'world_heritage_sites.name',
'world_heritage_sites.name_jp',
'world_heritage_sites.region',
'country',
'category',
'year_inscribed',
'is_endangered',
'image_url',
'world_heritage_sites.study_region',
'world_heritage_sites.category',
'world_heritage_sites.year_inscribed',
'world_heritage_sites.is_endangered',
'world_heritage_sites.image_url',
])
->chunkById($chunk, function ($rows) use ($client, $indexName, $dryRun, &$processed) {
$objects = [];

foreach ($rows as $row) {

$statePartyCodes = $row->countries->pluck('state_party_code')->toArray();
$countryNamesJp = $row->countries->pluck('name_jp')->toArray();
$countries = $row->countries
->filter(fn ($country) => $country->state_party_code !== null)
->values();

$statePartyCodes = $countries
->pluck('state_party_code')
->filter()
->values()
->toArray();

$countryNamesEn = $countries
->pluck('name_en')
->filter()
->values()
->toArray();

$countryNamesJp = $countries
->pluck('name_jp')
->filter()
->values()
->toArray();

$countryCount = $countries->count();

$country = null;
$countryNameJp = null;

if ($countryCount === 1) {
$country = $countryNamesEn[0] ?? null;
$countryNameJp = $countryNamesJp[0] ?? null;
}

$objects[] = [
'objectID' => (string)$row->id,
// for sorting in algolia
'id' => (int)$row->id,
'official_name' => (string)$row->official_name,
'name' => (string)$row->name,
'name_jp' => (string)$row->name_jp,
'country' => $row->country !== null ? (string)$row->country : null,
'country_name_jp' => $row->countries->first()?->name_jp,
'region' => (string)$row->region,
'category' => (string)$row->category,
'year_inscribed' => (int)$row->year_inscribed,
'is_endangered' => (bool)$row->is_endangered,
'thumbnail_url' => $row->image_url !== null ? (string)$row->image_url : null,
'country_names_jp' => $countryNamesJp,
'state_party_codes' => $statePartyCodes,
'objectID' => (string) $row->id,
'id' => (int) $row->id,
'official_name' => (string) $row->official_name,
'name' => (string) $row->name,
'name_jp' => (string) $row->name_jp,
'country' => $country,
'country_name_jp' => $countryNameJp,
'region' => (string) $row->region,
'study_region' => (string) $row->study_region,
'category' => (string) $row->category,
'year_inscribed' => $row->year_inscribed !== null ? (int) $row->year_inscribed : null,
'is_endangered' => (bool) $row->is_endangered,
'thumbnail_url' => $row->image_url !== null ? (string) $row->image_url : null,
'state_party_codes' => $countryCount > 1 ? $statePartyCodes : [],
'country_names_jp' => $countryCount > 1 ? $countryNamesJp : [],
];
}

if ($dryRun) {
if ($processed === 0 && isset($objects[0])) {
$this->line(json_encode($objects[0], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));
}

$processed += count($objects);
return;
}
if ((int) $row->id === 1133) {
dd([
'state_party_codes' => $statePartyCodes,
'country_names_jp' => $countryNamesJp,
'object' => end($objects),
]);
}

$res = $client->saveObjects(
indexName: $indexName,
Expand All @@ -123,10 +145,12 @@ public function handle(): int
if ($taskId !== null) {
$client->waitForTask($indexName, $taskId);
}

$processed += count($objects);
});

$this->info("Done: processed={$processed}");

return self::SUCCESS;
}
}
}
31 changes: 29 additions & 2 deletions src/app/Console/Commands/ImportCountriesFromSplitFile.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace App\Console\Commands;

use Illuminate\Console\Command;
use Illuminate\Support\Facades\Config;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Storage;

Expand Down Expand Up @@ -58,13 +59,20 @@ public function handle(): int
if ($max > 0 && $imported >= $max) {
break;
}
if (!is_array($row)) { $skipped++; continue; }

if (!is_array($row)) {
$skipped++;
continue;
}

$code = strtoupper(trim((string) ($row['state_party_code'] ?? '')));
if ($code === '' || strlen($code) !== 3) {
$skipped++;
if ($strict) {
$this->error("Strict: invalid state_party_code: " . json_encode($row, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));
$this->error(
'Strict: invalid state_party_code: ' .
json_encode($row, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)
);
return self::FAILURE;
}
continue;
Expand All @@ -78,6 +86,15 @@ public function handle(): int
$nameEn = $code;
}

if ($nameJp === null) {
$nameJp = $this->resolveCountryNameJapanese($code);
}

if ($strict && $nameJp === null) {
$this->error("Strict: name_jp could not be resolved for state_party_code [{$code}]");
return self::FAILURE;
}

$batch[] = [
'state_party_code' => $code,
'name_en' => $nameEn,
Expand Down Expand Up @@ -164,6 +181,16 @@ private function toNullableString(mixed $v): ?string
}

$s = trim($v);

return $s === '' ? null : $s;
}

private function resolveCountryNameJapanese(string $iso3): ?string
{
$countryNameJa = Config::get('country_ja.alpha3_to_country.' . strtoupper(trim($iso3)));

return is_string($countryNameJa) && $countryNameJa !== ''
? $countryNameJa
: null;
}
}
61 changes: 25 additions & 36 deletions src/app/Console/Commands/ImportWorldHeritageFromJson.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use App\Models\WorldHeritage;
use Carbon\Carbon;
use Illuminate\Console\Command;
use App\Support\StudyRegionResolver;

class ImportWorldHeritageFromJson extends Command
{
Expand Down Expand Up @@ -133,26 +134,17 @@ private function mapFromUnescoApiRow(array $row): array
$id = $row['id_no'] ?? null;
$lat = $row['coordinates']['lat'] ?? null;
$lon = $row['coordinates']['lon'] ?? null;
$criteriaRaw = $row['criteria_txt'] ?? $row['criteria'] ?? null;
$stateParty = $row['states'] ?? $row['state_party'] ?? null;
if (is_array($stateParty)) {
$stateParty = $stateParty[0] ?? null;
}

$stateParty = is_string($stateParty) ? strtoupper(trim($stateParty)) : null;
if ($stateParty === '') {
$stateParty = null;
}
if ($stateParty !== null && !preg_match('/^[A-Z]{3}$/', $stateParty)) {
$stateParty = null;
}
$countryName = $this->extractCountryName($row);
$statePartyIso3 = $this->extractIso3StateParty($row);

return [
'id' => $this->toNullableInt($id),
'official_name' => $row['official_name'] ?? null,
'name' => $row['name_en'] ?? $row['name'] ?? null,
'region' => $row['region_en'] ?? $row['region'] ?? null,
'state_party' => $stateParty,
'state_party' => $statePartyIso3,
'study_region' => StudyRegionResolver::resolve($countryName)->value,
'category' => $row['category'] ?? $row['type'] ?? null,
'criteria' => $row['criteria'] ?? null,
'year_inscribed' => $this->toNullableInt($row['date_inscribed'] ?? $row['year_inscribed'] ?? null),
Expand All @@ -168,29 +160,6 @@ private function mapFromUnescoApiRow(array $row): array
];
}

private function criteriaFromTxt(mixed $raw): array
{
if ($raw === null) {
return [];
}

$s = trim((string) $raw);
if ($s === '') {
return [];
}

preg_match_all('/\(([^)]+)\)/', $s, $m);
if (isset($m[1]) && $m[1] !== []) {
return array_values(array_filter(array_map(fn($v) => trim((string) $v), $m[1])));
}

$s = trim($s, " \t\n\r\0\x0B()");
if ($s === '') {
return [];
}
return [$s];
}

private function flushBatch(array $batch): int
{
$updateColumns = array_values(array_diff(array_keys($batch[0]), ['id']));
Expand Down Expand Up @@ -241,6 +210,26 @@ private function extractIso3StateParty(array $row): ?string
return null;
}

private function extractCountryName(array $row): ?string
{
$states = $row['states'] ?? $row['state_party'] ?? null;

if (is_string($states)) {
$normalized = trim($states);
return $normalized !== '' ? $normalized : null;
}

if (is_array($states)) {
$first = $states[0] ?? null;
if (is_string($first)) {
$normalized = trim($first);
return $normalized !== '' ? $normalized : null;
}
}

return null;
}

private function toNullableInt(mixed $v): ?int
{
if ($v === null || $v === '') {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ public function handle(): int
'official_name' => $this->toNullableString($row['official_name'] ?? null),
'name' => $this->toNullableString($row['name'] ?? null),
'name_jp' => $this->toNullableString($row['name_jp'] ?? null),
'study_region' => $this->toNullableString($row['study_region'] ?? null),
'country' => $this->toNullableString($row['country'] ?? null),
'region' => $this->toNullableString($row['region'] ?? null),
'state_party' => $this->toNullableString($row['state_party'] ?? null),
Expand Down Expand Up @@ -200,23 +201,29 @@ private function toNullableFloat(mixed $v): ?float
private function toNullableBoolInt(mixed $v): ?int
{
if ($v === null || $v === '') {
return null;
return 0;
}

if (is_bool($v)) {
return $v ? 1 : 0;
}

if (is_int($v) || is_float($v)) {
return ((int) $v) === 1 ? 1 : 0;
}

if (is_string($v)) {
$s = strtolower(trim($v));

if (in_array($s, ['1', 'true', 't', 'yes', 'y', 'on'], true)) {
return 1;
}

if (in_array($s, ['0', 'false', 'f', 'no', 'n', 'off'], true)) {
return 0;
}
}
return null;

return 0;
}
}
Loading
Loading