kithkin/app/Jobs/GeocodeEventLocations.php

266 lines
9.9 KiB
PHP

<?php
namespace App\Jobs;
use App\Models\EventMeta;
use App\Models\Location;
use App\Services\Location\Geocoder;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Str;
use Illuminate\Support\Facades\Log;
class GeocodeEventLocations implements ShouldQueue
{
use Dispatchable, Queueable, InteractsWithQueue, SerializesModels;
// process up to n records this run; null = no cap
public function __construct(
public ?int $limit = null
) {}
// convenience helpers
public static function push(?int $limit = null): void {
dispatch(new static($limit));
}
public static function runNow(?int $limit = null): void {
dispatch_sync(new static($limit));
}
// handle geocoding
public function handle(Geocoder $geocoder): void
{
// working counters
$stats = [
'locations' => [
'processed' => 0,
'updated' => 0,
'skipped' => 0,
'failed' => 0,
],
'events' => [
'processed' => 0,
'created' => 0,
'updated' => 0,
'skipped' => 0,
'failed' => 0,
],
];
$handled = 0;
Log::info('GeocodeEventLocations: start', ['limit' => $this->limit]);
// first, geocode any location rows missing coordinates
$locations = Location::query()
->whereNotNull('raw_address')
->where('raw_address', '<>', '')
->where(function ($q) {
$q->whereNull('lat')->orWhereNull('lon');
})
->orderBy('id');
$stop = false;
$locations->chunkById(200, function ($chunk) use ($geocoder, &$stats, &$handled, &$stop) {
foreach ($chunk as $loc) {
if ($stop) {
return false;
}
if ($this->limit !== null && $handled >= $this->limit) {
$stop = true;
return false;
}
$handled++;
$stats['locations']['processed']++;
try {
$norm = $geocoder->forward($loc->raw_address);
if (!$norm || !is_numeric($norm['lat'] ?? null) || !is_numeric($norm['lon'] ?? null)) {
$stats['locations']['skipped']++;
continue;
}
$changed = false;
if ($loc->lat === null && is_numeric($norm['lat'])) {
$loc->lat = $norm['lat'];
$changed = true;
}
if ($loc->lon === null && is_numeric($norm['lon'])) {
$loc->lon = $norm['lon'];
$changed = true;
}
foreach (['street', 'city', 'state', 'postal', 'country'] as $field) {
if (empty($loc->{$field}) && !empty($norm[$field])) {
$loc->{$field} = $norm[$field];
$changed = true;
}
}
if ($changed) {
$loc->save();
$stats['locations']['updated']++;
} else {
$stats['locations']['skipped']++;
}
} catch (\Throwable $e) {
$stats['locations']['failed']++;
Log::warning('GeocodeEventLocations: location failed', [
'location_id' => $loc->id,
'raw_address' => $loc->raw_address,
'error' => $e->getMessage(),
]);
}
}
}, 'id');
if ($stop) {
Log::info('GeocodeEventLocations: done', $stats);
return;
}
// events that have a non-empty location string but no linked location row yet
$todo = EventMeta::query()
->whereNull('location_id')
->whereNotNull('location')
->where('location', '<>', '')
->orderBy('event_id'); // important for chunkById
// log total to process (before limit)
$total = (clone $todo)->count();
Log::info('[geo] starting GeocodeEventLocations', ['total' => $total, 'limit' => $this->limit]);
// chunk through event_meta rows
$todo->chunkById(200, function ($chunk) use ($geocoder, &$stats, &$handled, &$stop) {
foreach ($chunk as $meta) {
if ($stop) {
return false; // stop further chunking
}
// respect limit if provided
if ($this->limit !== null && $handled >= $this->limit) {
$stop = true;
return false;
}
try {
// geocode the free-form location string; prefer an existing location match
$query = $meta->location;
$location = Location::where('display_name', $meta->location)
->orWhere('raw_address', $meta->location)
->first();
if (!$location) {
// soft match on prefix when there is exactly one candidate
$matches = Location::where('display_name', 'like', $meta->location . '%')
->limit(2)
->get();
if ($matches->count() === 1) {
$location = $matches->first();
}
}
if ($location) {
// if we already have coords, just link and move on
if (is_numeric($location->lat) && is_numeric($location->lon)) {
$meta->location_id = $location->id;
$meta->save();
$handled++;
$stats['events']['processed']++;
$stats['events']['updated']++;
continue;
}
if ($location->raw_address) {
$query = $location->raw_address;
}
}
$norm = $geocoder->forward($query);
// skip obvious non-address labels or unresolved queries
if (!$norm || (!$norm['lat'] && !$norm['street'])) {
$stats['events']['skipped']++;
$handled++;
$stats['events']['processed']++;
Log::info('GeocodeEventLocations: skipped', [
'event_id' => $meta->event_id,
'location' => $meta->location,
'query' => $query,
]);
continue;
}
// normalized match key to reduce duplicates
$lookup = [
'display_name' => $norm['display_name'],
'street' => $norm['street'],
'city' => $norm['city'],
'state' => $norm['state'],
'postal' => $norm['postal'],
'country' => $norm['country'],
];
// try to match an existing location (by normalized fields)
$existing = Location::where($lookup)->first();
// fall back to raw string match against any pre-seeded label rows
if (!$existing && $meta->location) {
$existing = Location::where('display_name', $meta->location)
->orWhere('raw_address', $meta->location)
->first();
}
// reuse existing location or create a new one with coords
$loc = $existing ?? Location::firstOrCreate(
$lookup,
[
'raw_address' => $norm['raw_address'],
'lat' => $norm['lat'],
'lon' => $norm['lon'],
]
);
// if we matched an existing row missing coords, backfill once
if ($existing && (is_null($existing->lat) || is_null($existing->lon))) {
$existing->lat = $norm['lat'];
$existing->lon = $norm['lon'];
$existing->raw_address ??= $norm['raw_address'];
$existing->save();
$stats['events']['updated']++;
}
if ($loc->wasRecentlyCreated) {
$stats['events']['created']++;
}
// link event_meta → locations
$meta->location_id = $loc->id;
$meta->save();
$handled++;
$stats['events']['processed']++;
} catch (\Throwable $e) {
$stats['events']['failed']++;
$handled++;
$stats['events']['processed']++;
Log::warning('GeocodeEventLocations: failed', [
'event_id' => $meta->event_id,
'location' => $meta->location,
'error' => $e->getMessage(),
]);
}
}
}, 'event_id');
Log::info('GeocodeEventLocations: done', $stats);
}
}