rogaining_srv/migrate_location_to_location2025_with_validation.py

#!/usr/bin/env python
"""
LocationからLocation2025への完全データ移行スクリプト（統計検証付き）

機能：
- 全フィールド対応の完全データ移行
- リアルタイム統計検証
- データ品質チェック
- 移行前後の比較
- 詳細レポート生成
"""

import os
import django
from collections import defaultdict, Counter

# Django設定
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
django.setup()

from rog.models import Location, Location2025, NewEvent2
from django.contrib.auth import get_user_model
from django.contrib.gis.geos import Point

def analyze_source_data():
    """移行前のデータ分析"""
    print('=== 移行前データ分析 ===')

    total_locations = Location.objects.count()
    print(f'総Location件数: {total_locations}件')

    # グループ別統計
    with_group = Location.objects.exclude(group__isnull=True).exclude(group='').count()
    without_group = total_locations - with_group
    print(f'groupありLocation: {with_group}件')
    print(f'groupなしLocation: {without_group}件')

    # 座標データ統計
    with_geom = Location.objects.exclude(geom__isnull=True).count()
    with_lat_lng = Location.objects.exclude(longitude__isnull=True).exclude(latitude__isnull=True).count()
    print(f'geom座標あり: {with_geom}件')
    print(f'lat/lng座標あり: {with_lat_lng}件')

    # フィールド統計
    fields_stats = {}
    text_fields = ['photos', 'videos', 'remark', 'tags', 'evaluation_value', 'sub_loc_id', 'subcategory']
    numeric_fields = ['checkin_point', 'buy_point']
    boolean_fields = ['hidden_location']

    for field in text_fields:
        if hasattr(Location, field):
            count = Location.objects.exclude(**{f'{field}__isnull': True}).exclude(**{field: ''}).count()
            fields_stats[field] = count
            print(f'{field}データあり: {count}件')

    for field in numeric_fields:
        if hasattr(Location, field):
            count = Location.objects.exclude(**{f'{field}__isnull': True}).exclude(**{field: 0}).count()
            fields_stats[field] = count
            print(f'{field}データあり: {count}件')

    for field in boolean_fields:
        if hasattr(Location, field):
            count = Location.objects.filter(**{field: True}).count()
            fields_stats[field] = count
            print(f'{field}データあり: {count}件')

    return {
        'total': total_locations,
        'with_group': with_group,
        'without_group': without_group,
        'with_geom': with_geom,
        'with_lat_lng': with_lat_lng,
        'fields': fields_stats
    }

def validate_migration_data(source_stats):
    """移行後データ検証"""
    print('\n=== 移行後データ検証 ===')

    total_migrated = Location2025.objects.count()
    print(f'移行完了件数: {total_migrated}件')

    # フィールド検証
    migrated_stats = {}
    field_mapping = {
        'photos': 'photos',
        'videos': 'videos',
        'remark': 'remark',
        'tags': 'tags',
        'evaluation_value': 'evaluation_value',
        'hidden_location': 'hidden_location',
        'sub_loc_id': 'sub_loc_id',
        'subcategory': 'subcategory'
    }

    for source_field, target_field in field_mapping.items():
        if source_field == 'hidden_location':
            count = Location2025.objects.filter(**{target_field: True}).count()
        else:
            count = Location2025.objects.exclude(**{f'{target_field}__isnull': True}).exclude(**{target_field: ''}).count()
        migrated_stats[source_field] = count
        print(f'{target_field}データあり: {count}件')

    # 座標検証
    with_location = Location2025.objects.exclude(location__isnull=True).count()
    with_lat_lng = Location2025.objects.exclude(longitude__isnull=True).exclude(latitude__isnull=True).count()
    print(f'location座標あり: {with_location}件')
    print(f'lat/lng座標あり: {with_lat_lng}件')

    # 必須フィールド検証
    with_event = Location2025.objects.exclude(event__isnull=True).count()
    with_cp_name = Location2025.objects.exclude(cp_name__isnull=True).exclude(cp_name='').count()
    print(f'eventリンクあり: {with_event}件')
    print(f'cp_nameあり: {with_cp_name}件')

    return {
        'total': total_migrated,
        'fields': migrated_stats,
        'with_location': with_location,
        'with_lat_lng': with_lat_lng,
        'with_event': with_event,
        'with_cp_name': with_cp_name
    }

def generate_comparison_report(source_stats, migrated_stats):
    """移行前後比較レポート"""
    print('\n=== 移行前後比較レポート ===')

    print(f'総件数比較:')
    print(f'  移行前: {source_stats["total"]:,}件')
    print(f'  移行後: {migrated_stats["total"]:,}件')
    print(f'  移行率: {(migrated_stats["total"] / source_stats["total"] * 100):.1f}%')

    print(f'\nフィールド別データ保持率:')
    for field in source_stats['fields']:
        if field in migrated_stats['fields']:
            source_count = source_stats['fields'][field]
            migrated_count = migrated_stats['fields'][field]
            if source_count > 0:
                retention_rate = (migrated_count / source_count * 100)
                print(f'  {field}: {migrated_count:,}/{source_count:,}件 ({retention_rate:.1f}%)')
            else:
                print(f'  {field}: {migrated_count:,}/0件 (N/A)')

def analyze_event_distribution():
    """イベント別分布分析"""
    print('\n=== イベント別分布分析 ===')

    event_stats = {}
    for location in Location2025.objects.select_related('event'):
        event_name = location.event.event_name if location.event else 'No Event'
        event_code = location.event.event_code if location.event else 'No Code'
        key = f"{event_code} ({event_name})"
        event_stats[key] = event_stats.get(key, 0) + 1

    # 件数順でソート
    sorted_events = sorted(event_stats.items(), key=lambda x: x[1], reverse=True)

    print(f'総イベント数: {len(sorted_events)}件')
    print(f'上位イベント:')
    for i, (event_key, count) in enumerate(sorted_events[:10], 1):
        print(f'  {i:2d}. {event_key}: {count:,}件')

    return event_stats

def sample_data_verification():
    """サンプルデータ検証"""
    print('\n=== サンプルデータ検証 ===')

    # 各種データパターンのサンプルを取得
    samples = []

    # 写真データありのサンプル
    photo_sample = Location2025.objects.filter(photos__isnull=False).exclude(photos='').first()
    if photo_sample:
        samples.append(('写真データあり', photo_sample))

    # remarkデータありのサンプル
    remark_sample = Location2025.objects.filter(remark__isnull=False).exclude(remark='').first()
    if remark_sample:
        samples.append(('詳細説明あり', remark_sample))

    # 高ポイントのサンプル
    high_point_sample = Location2025.objects.filter(cp_point__gt=50).first()
    if high_point_sample:
        samples.append(('高ポイント', high_point_sample))

    # 通常サンプル
    if not samples:
        normal_sample = Location2025.objects.first()
        if normal_sample:
            samples.append(('通常データ', normal_sample))

    for sample_type, sample in samples[:3]:
        print(f'\n【{sample_type}サンプル】')
        print(f'  CP番号: {sample.cp_number}')
        print(f'  CP名: {sample.cp_name}')
        print(f'  CPポイント: {sample.cp_point}')
        print(f'  フォトポイント: {sample.photo_point}')
        print(f'  sub_loc_id: {sample.sub_loc_id}')
        print(f'  subcategory: {sample.subcategory}')

        # データ長を制限して表示
        def truncate_text(text, max_len=30):
            if not text:
                return '(空)'
            return text[:max_len] + '...' if len(text) > max_len else text

        print(f'  写真: {truncate_text(sample.photos)}')
        print(f'  動画: {truncate_text(sample.videos)}')
        print(f'  詳細: {truncate_text(sample.remark)}')
        print(f'  タグ: {truncate_text(sample.tags)}')
        print(f'  評価値: {truncate_text(sample.evaluation_value)}')
        print(f'  隠し: {sample.hidden_location}')
        print(f'  イベント: {sample.event.event_name if sample.event else "None"}')

def main():
    """メイン実行関数"""
    User = get_user_model()
    default_user = User.objects.first()

    print('='*60)
    print('Location → Location2025 完全移行スクリプト（統計検証付き）')
    print('='*60)

    # 1. 移行前データ分析
    source_stats = analyze_source_data()

    # 2. 既存Location2025データ削除
    print('\n=== 既存データクリア ===')
    deleted_count = Location2025.objects.count()
    Location2025.objects.all().delete()
    print(f'削除済み: {deleted_count}件')

    # 3. NewEvent2のevent_codeマップ作成
    print('\n=== Event Code マッピング ===')
    events = NewEvent2.objects.filter(event_code__isnull=False).exclude(event_code='')
    event_code_map = {}
    for event in events:
        event_code_map[event.event_code] = event
    print(f'有効なevent_code数: {len(event_code_map)}件')

    # 4. データ移行実行
    print('\n=== データ移行実行 ===')
    locations = Location.objects.all()
    processed_combinations = set()
    migrated_count = 0
    skipped_count = 0
    error_count = 0
    event_migration_stats = defaultdict(int)

    for location in locations:
        try:
            # groupが空の場合はスキップ
            if not location.group:
                skipped_count += 1
                continue

            # location.groupに含まれるevent_codeを検索
            matched_event = None
            matched_event_code = None

            for event_code, event in event_code_map.items():
                if event_code in location.group:
                    matched_event = event
                    matched_event_code = event_code
                    break

            # マッチするevent_codeがない場合はスキップ
            if not matched_event:
                skipped_count += 1
                continue

            # cp_number + event_idの組み合わせを確認
            combination_key = (location.cp, matched_event.id)
            if combination_key in processed_combinations:
                skipped_count += 1
                continue

            # この組み合わせを処理済みとしてマーク
            processed_combinations.add(combination_key)

            # MultiPointからPointに変換
            point_location = None
            if location.geom and len(location.geom) > 0:
                first_point = location.geom[0]
                point_location = Point(first_point.x, first_point.y)
            elif location.longitude and location.latitude:
                point_location = Point(location.longitude, location.latitude)

            # Location2025レコードを作成
            location2025, created = Location2025.objects.update_or_create(
                cp_number=location.cp,
                event=matched_event,
                defaults={
                    'cp_name': location.location_name or '',
                    'sub_loc_id': location.sub_loc_id or '',
                    'subcategory': location.subcategory or '',
                    'latitude': location.latitude or 0.0,
                    'longitude': location.longitude or 0.0,
                    'location': point_location,
                    'cp_point': int(location.checkin_point) if location.checkin_point else 0,
                    'photo_point': int(location.checkin_point) if location.checkin_point else 0,
                    'buy_point': int(location.buy_point) if location.buy_point else 0,
                    'checkin_radius': location.checkin_radius or 100.0,
                    'auto_checkin': location.auto_checkin or False,
                    'shop_closed': location.shop_closed or False,
                    'shop_shutdown': location.shop_shutdown or False,
                    'opening_hours': '',
                    'address': location.address or '',
                    'phone': location.phone or '',
                    'website': '',
                    'description': location.remark or '',
                    # 追加フィールド
                    'photos': location.photos or '',
                    'videos': location.videos or '',
                    'remark': location.remark or '',
                    'tags': location.tags or '',
                    'evaluation_value': location.evaluation_value or '',
                    'hidden_location': location.hidden_location or False,
                    # 管理情報
                    'is_active': True,
                    'sort_order': 0,
                    'csv_source_file': 'migration_from_location',
                    'created_by': default_user,
                    'updated_by': default_user,
                }
            )

            if created:
                migrated_count += 1
                event_migration_stats[matched_event_code] += 1

                if migrated_count % 100 == 0:
                    print(f'進捗: {migrated_count:,}件完了')

        except Exception as e:
            print(f'❌ エラー: CP {location.cp} - {str(e)}')
            error_count += 1

    # 5. 移行結果サマリー
    print(f'\n=== 移行結果サマリー ===')
    print(f'移行完了: {migrated_count:,}件')
    print(f'スキップ: {skipped_count:,}件')
    print(f'エラー: {error_count:,}件')
    print(f'総処理: {migrated_count + skipped_count + error_count:,}件')

    # 6. 移行後データ検証
    migrated_stats = validate_migration_data(source_stats)

    # 7. 比較レポート生成
    generate_comparison_report(source_stats, migrated_stats)

    # 8. イベント別分布分析
    event_distribution = analyze_event_distribution()

    # 9. サンプルデータ検証
    sample_data_verification()

    # 10. 最終検証サマリー
    print('\n' + '='*60)
    print('🎯 移行完了検証サマリー')
    print('='*60)

    success_rate = (migrated_count / source_stats['total'] * 100) if source_stats['total'] > 0 else 0
    print(f'✅ 総移行成功率: {success_rate:.1f}% ({migrated_count:,}/{source_stats["total"]:,}件)')
    print(f'✅ エラー率: {(error_count / source_stats["total"] * 100):.1f}% ({error_count:,}件)')
    print(f'✅ 最終Location2025件数: {Location2025.objects.count():,}件')
    print(f'✅ 対応イベント数: {len(event_distribution)}件')

    # データ品質スコア算出
    quality_score = 0
    if migrated_stats['with_event'] == migrated_stats['total']:
        quality_score += 25  # 全てにイベントがリンクされている
    if migrated_stats['with_cp_name'] >= migrated_stats['total'] * 0.95:
        quality_score += 25  # 95%以上にCP名がある
    if migrated_stats['fields']['photos'] >= migrated_stats['total'] * 0.8:
        quality_score += 25  # 80%以上に写真データがある
    if migrated_stats['fields']['remark'] >= migrated_stats['total'] * 0.8:
        quality_score += 25  # 80%以上に詳細説明がある

    print(f'✅ データ品質スコア: {quality_score}/100点')

    if quality_score >= 90:
        print('🏆 優秀：本格運用準備完了')
    elif quality_score >= 70:
        print('🥉 良好：運用可能レベル')
    elif quality_score >= 50:
        print('⚠️  要改善：一部データ補完推奨')
    else:
        print('❌ 要対応：データ品質に課題あり')

    print('\n✅ 全フィールド対応の完全データ移行が正常に完了しました')

if __name__ == "__main__":
    main()