#!/usr/bin/env python """ LocationからLocation2025への完全データ移行スクリプト(統計検証付き) 機能: - 全フィールド対応の完全データ移行 - リアルタイム統計検証 - データ品質チェック - 移行前後の比較 - 詳細レポート生成 """ import os import django from collections import defaultdict, Counter # Django設定 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') django.setup() from rog.models import Location, Location2025, NewEvent2 from django.contrib.auth import get_user_model from django.contrib.gis.geos import Point def analyze_source_data(): """移行前のデータ分析""" print('=== 移行前データ分析 ===') total_locations = Location.objects.count() print(f'総Location件数: {total_locations}件') # グループ別統計 with_group = Location.objects.exclude(group__isnull=True).exclude(group='').count() without_group = total_locations - with_group print(f'groupありLocation: {with_group}件') print(f'groupなしLocation: {without_group}件') # 座標データ統計 with_geom = Location.objects.exclude(geom__isnull=True).count() with_lat_lng = Location.objects.exclude(longitude__isnull=True).exclude(latitude__isnull=True).count() print(f'geom座標あり: {with_geom}件') print(f'lat/lng座標あり: {with_lat_lng}件') # フィールド統計 fields_stats = {} text_fields = ['photos', 'videos', 'remark', 'tags', 'evaluation_value', 'sub_loc_id', 'subcategory'] numeric_fields = ['checkin_point', 'buy_point'] boolean_fields = ['hidden_location'] for field in text_fields: if hasattr(Location, field): count = Location.objects.exclude(**{f'{field}__isnull': True}).exclude(**{field: ''}).count() fields_stats[field] = count print(f'{field}データあり: {count}件') for field in numeric_fields: if hasattr(Location, field): count = Location.objects.exclude(**{f'{field}__isnull': True}).exclude(**{field: 0}).count() fields_stats[field] = count print(f'{field}データあり: {count}件') for field in boolean_fields: if hasattr(Location, field): count = Location.objects.filter(**{field: True}).count() fields_stats[field] = count print(f'{field}データあり: {count}件') return { 'total': total_locations, 'with_group': with_group, 'without_group': without_group, 'with_geom': with_geom, 'with_lat_lng': with_lat_lng, 'fields': fields_stats } def validate_migration_data(source_stats): """移行後データ検証""" print('\n=== 移行後データ検証 ===') total_migrated = Location2025.objects.count() print(f'移行完了件数: {total_migrated}件') # フィールド検証 migrated_stats = {} field_mapping = { 'photos': 'photos', 'videos': 'videos', 'remark': 'remark', 'tags': 'tags', 'evaluation_value': 'evaluation_value', 'hidden_location': 'hidden_location', 'sub_loc_id': 'sub_loc_id', 'subcategory': 'subcategory' } for source_field, target_field in field_mapping.items(): if source_field == 'hidden_location': count = Location2025.objects.filter(**{target_field: True}).count() else: count = Location2025.objects.exclude(**{f'{target_field}__isnull': True}).exclude(**{target_field: ''}).count() migrated_stats[source_field] = count print(f'{target_field}データあり: {count}件') # 座標検証 with_location = Location2025.objects.exclude(location__isnull=True).count() with_lat_lng = Location2025.objects.exclude(longitude__isnull=True).exclude(latitude__isnull=True).count() print(f'location座標あり: {with_location}件') print(f'lat/lng座標あり: {with_lat_lng}件') # 必須フィールド検証 with_event = Location2025.objects.exclude(event__isnull=True).count() with_cp_name = Location2025.objects.exclude(cp_name__isnull=True).exclude(cp_name='').count() print(f'eventリンクあり: {with_event}件') print(f'cp_nameあり: {with_cp_name}件') return { 'total': total_migrated, 'fields': migrated_stats, 'with_location': with_location, 'with_lat_lng': with_lat_lng, 'with_event': with_event, 'with_cp_name': with_cp_name } def generate_comparison_report(source_stats, migrated_stats): """移行前後比較レポート""" print('\n=== 移行前後比較レポート ===') print(f'総件数比較:') print(f' 移行前: {source_stats["total"]:,}件') print(f' 移行後: {migrated_stats["total"]:,}件') print(f' 移行率: {(migrated_stats["total"] / source_stats["total"] * 100):.1f}%') print(f'\nフィールド別データ保持率:') for field in source_stats['fields']: if field in migrated_stats['fields']: source_count = source_stats['fields'][field] migrated_count = migrated_stats['fields'][field] if source_count > 0: retention_rate = (migrated_count / source_count * 100) print(f' {field}: {migrated_count:,}/{source_count:,}件 ({retention_rate:.1f}%)') else: print(f' {field}: {migrated_count:,}/0件 (N/A)') def analyze_event_distribution(): """イベント別分布分析""" print('\n=== イベント別分布分析 ===') event_stats = {} for location in Location2025.objects.select_related('event'): event_name = location.event.event_name if location.event else 'No Event' event_code = location.event.event_code if location.event else 'No Code' key = f"{event_code} ({event_name})" event_stats[key] = event_stats.get(key, 0) + 1 # 件数順でソート sorted_events = sorted(event_stats.items(), key=lambda x: x[1], reverse=True) print(f'総イベント数: {len(sorted_events)}件') print(f'上位イベント:') for i, (event_key, count) in enumerate(sorted_events[:10], 1): print(f' {i:2d}. {event_key}: {count:,}件') return event_stats def sample_data_verification(): """サンプルデータ検証""" print('\n=== サンプルデータ検証 ===') # 各種データパターンのサンプルを取得 samples = [] # 写真データありのサンプル photo_sample = Location2025.objects.filter(photos__isnull=False).exclude(photos='').first() if photo_sample: samples.append(('写真データあり', photo_sample)) # remarkデータありのサンプル remark_sample = Location2025.objects.filter(remark__isnull=False).exclude(remark='').first() if remark_sample: samples.append(('詳細説明あり', remark_sample)) # 高ポイントのサンプル high_point_sample = Location2025.objects.filter(cp_point__gt=50).first() if high_point_sample: samples.append(('高ポイント', high_point_sample)) # 通常サンプル if not samples: normal_sample = Location2025.objects.first() if normal_sample: samples.append(('通常データ', normal_sample)) for sample_type, sample in samples[:3]: print(f'\n【{sample_type}サンプル】') print(f' CP番号: {sample.cp_number}') print(f' CP名: {sample.cp_name}') print(f' CPポイント: {sample.cp_point}') print(f' フォトポイント: {sample.photo_point}') print(f' sub_loc_id: {sample.sub_loc_id}') print(f' subcategory: {sample.subcategory}') # データ長を制限して表示 def truncate_text(text, max_len=30): if not text: return '(空)' return text[:max_len] + '...' if len(text) > max_len else text print(f' 写真: {truncate_text(sample.photos)}') print(f' 動画: {truncate_text(sample.videos)}') print(f' 詳細: {truncate_text(sample.remark)}') print(f' タグ: {truncate_text(sample.tags)}') print(f' 評価値: {truncate_text(sample.evaluation_value)}') print(f' 隠し: {sample.hidden_location}') print(f' イベント: {sample.event.event_name if sample.event else "None"}') def main(): """メイン実行関数""" User = get_user_model() default_user = User.objects.first() print('='*60) print('Location → Location2025 完全移行スクリプト(統計検証付き)') print('='*60) # 1. 移行前データ分析 source_stats = analyze_source_data() # 2. 既存Location2025データ削除 print('\n=== 既存データクリア ===') deleted_count = Location2025.objects.count() Location2025.objects.all().delete() print(f'削除済み: {deleted_count}件') # 3. NewEvent2のevent_codeマップ作成 print('\n=== Event Code マッピング ===') events = NewEvent2.objects.filter(event_code__isnull=False).exclude(event_code='') event_code_map = {} for event in events: event_code_map[event.event_code] = event print(f'有効なevent_code数: {len(event_code_map)}件') # 4. データ移行実行 print('\n=== データ移行実行 ===') locations = Location.objects.all() processed_combinations = set() migrated_count = 0 skipped_count = 0 error_count = 0 event_migration_stats = defaultdict(int) for location in locations: try: # groupが空の場合はスキップ if not location.group: skipped_count += 1 continue # location.groupに含まれるevent_codeを検索 matched_event = None matched_event_code = None for event_code, event in event_code_map.items(): if event_code in location.group: matched_event = event matched_event_code = event_code break # マッチするevent_codeがない場合はスキップ if not matched_event: skipped_count += 1 continue # cp_number + event_idの組み合わせを確認 combination_key = (location.cp, matched_event.id) if combination_key in processed_combinations: skipped_count += 1 continue # この組み合わせを処理済みとしてマーク processed_combinations.add(combination_key) # MultiPointからPointに変換 point_location = None if location.geom and len(location.geom) > 0: first_point = location.geom[0] point_location = Point(first_point.x, first_point.y) elif location.longitude and location.latitude: point_location = Point(location.longitude, location.latitude) # Location2025レコードを作成 location2025, created = Location2025.objects.update_or_create( cp_number=location.cp, event=matched_event, defaults={ 'cp_name': location.location_name or '', 'sub_loc_id': location.sub_loc_id or '', 'subcategory': location.subcategory or '', 'latitude': location.latitude or 0.0, 'longitude': location.longitude or 0.0, 'location': point_location, 'cp_point': int(location.checkin_point) if location.checkin_point else 0, 'photo_point': int(location.checkin_point) if location.checkin_point else 0, 'buy_point': int(location.buy_point) if location.buy_point else 0, 'checkin_radius': location.checkin_radius or 100.0, 'auto_checkin': location.auto_checkin or False, 'shop_closed': location.shop_closed or False, 'shop_shutdown': location.shop_shutdown or False, 'opening_hours': '', 'address': location.address or '', 'phone': location.phone or '', 'website': '', 'description': location.remark or '', # 追加フィールド 'photos': location.photos or '', 'videos': location.videos or '', 'remark': location.remark or '', 'tags': location.tags or '', 'evaluation_value': location.evaluation_value or '', 'hidden_location': location.hidden_location or False, # 管理情報 'is_active': True, 'sort_order': 0, 'csv_source_file': 'migration_from_location', 'created_by': default_user, 'updated_by': default_user, } ) if created: migrated_count += 1 event_migration_stats[matched_event_code] += 1 if migrated_count % 100 == 0: print(f'進捗: {migrated_count:,}件完了') except Exception as e: print(f'❌ エラー: CP {location.cp} - {str(e)}') error_count += 1 # 5. 移行結果サマリー print(f'\n=== 移行結果サマリー ===') print(f'移行完了: {migrated_count:,}件') print(f'スキップ: {skipped_count:,}件') print(f'エラー: {error_count:,}件') print(f'総処理: {migrated_count + skipped_count + error_count:,}件') # 6. 移行後データ検証 migrated_stats = validate_migration_data(source_stats) # 7. 比較レポート生成 generate_comparison_report(source_stats, migrated_stats) # 8. イベント別分布分析 event_distribution = analyze_event_distribution() # 9. サンプルデータ検証 sample_data_verification() # 10. 最終検証サマリー print('\n' + '='*60) print('🎯 移行完了検証サマリー') print('='*60) success_rate = (migrated_count / source_stats['total'] * 100) if source_stats['total'] > 0 else 0 print(f'✅ 総移行成功率: {success_rate:.1f}% ({migrated_count:,}/{source_stats["total"]:,}件)') print(f'✅ エラー率: {(error_count / source_stats["total"] * 100):.1f}% ({error_count:,}件)') print(f'✅ 最終Location2025件数: {Location2025.objects.count():,}件') print(f'✅ 対応イベント数: {len(event_distribution)}件') # データ品質スコア算出 quality_score = 0 if migrated_stats['with_event'] == migrated_stats['total']: quality_score += 25 # 全てにイベントがリンクされている if migrated_stats['with_cp_name'] >= migrated_stats['total'] * 0.95: quality_score += 25 # 95%以上にCP名がある if migrated_stats['fields']['photos'] >= migrated_stats['total'] * 0.8: quality_score += 25 # 80%以上に写真データがある if migrated_stats['fields']['remark'] >= migrated_stats['total'] * 0.8: quality_score += 25 # 80%以上に詳細説明がある print(f'✅ データ品質スコア: {quality_score}/100点') if quality_score >= 90: print('🏆 優秀:本格運用準備完了') elif quality_score >= 70: print('🥉 良好:運用可能レベル') elif quality_score >= 50: print('⚠️ 要改善:一部データ補完推奨') else: print('❌ 要対応:データ品質に課題あり') print('\n✅ 全フィールド対応の完全データ移行が正常に完了しました') if __name__ == "__main__": main()