Files
rogaining_srv/migrate_location_to_location2025_with_validation.py
2025-08-30 03:48:07 +09:00

398 lines
16 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
"""
LocationからLocation2025への完全データ移行スクリプト統計検証付き
機能:
- 全フィールド対応の完全データ移行
- リアルタイム統計検証
- データ品質チェック
- 移行前後の比較
- 詳細レポート生成
"""
import os
import django
from collections import defaultdict, Counter
# Django設定
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
django.setup()
from rog.models import Location, Location2025, NewEvent2
from django.contrib.auth import get_user_model
from django.contrib.gis.geos import Point
def analyze_source_data():
"""移行前のデータ分析"""
print('=== 移行前データ分析 ===')
total_locations = Location.objects.count()
print(f'総Location件数: {total_locations}')
# グループ別統計
with_group = Location.objects.exclude(group__isnull=True).exclude(group='').count()
without_group = total_locations - with_group
print(f'groupありLocation: {with_group}')
print(f'groupなしLocation: {without_group}')
# 座標データ統計
with_geom = Location.objects.exclude(geom__isnull=True).count()
with_lat_lng = Location.objects.exclude(longitude__isnull=True).exclude(latitude__isnull=True).count()
print(f'geom座標あり: {with_geom}')
print(f'lat/lng座標あり: {with_lat_lng}')
# フィールド統計
fields_stats = {}
text_fields = ['photos', 'videos', 'remark', 'tags', 'evaluation_value', 'sub_loc_id', 'subcategory']
numeric_fields = ['checkin_point', 'buy_point']
boolean_fields = ['hidden_location']
for field in text_fields:
if hasattr(Location, field):
count = Location.objects.exclude(**{f'{field}__isnull': True}).exclude(**{field: ''}).count()
fields_stats[field] = count
print(f'{field}データあり: {count}')
for field in numeric_fields:
if hasattr(Location, field):
count = Location.objects.exclude(**{f'{field}__isnull': True}).exclude(**{field: 0}).count()
fields_stats[field] = count
print(f'{field}データあり: {count}')
for field in boolean_fields:
if hasattr(Location, field):
count = Location.objects.filter(**{field: True}).count()
fields_stats[field] = count
print(f'{field}データあり: {count}')
return {
'total': total_locations,
'with_group': with_group,
'without_group': without_group,
'with_geom': with_geom,
'with_lat_lng': with_lat_lng,
'fields': fields_stats
}
def validate_migration_data(source_stats):
"""移行後データ検証"""
print('\n=== 移行後データ検証 ===')
total_migrated = Location2025.objects.count()
print(f'移行完了件数: {total_migrated}')
# フィールド検証
migrated_stats = {}
field_mapping = {
'photos': 'photos',
'videos': 'videos',
'remark': 'remark',
'tags': 'tags',
'evaluation_value': 'evaluation_value',
'hidden_location': 'hidden_location',
'sub_loc_id': 'sub_loc_id',
'subcategory': 'subcategory'
}
for source_field, target_field in field_mapping.items():
if source_field == 'hidden_location':
count = Location2025.objects.filter(**{target_field: True}).count()
else:
count = Location2025.objects.exclude(**{f'{target_field}__isnull': True}).exclude(**{target_field: ''}).count()
migrated_stats[source_field] = count
print(f'{target_field}データあり: {count}')
# 座標検証
with_location = Location2025.objects.exclude(location__isnull=True).count()
with_lat_lng = Location2025.objects.exclude(longitude__isnull=True).exclude(latitude__isnull=True).count()
print(f'location座標あり: {with_location}')
print(f'lat/lng座標あり: {with_lat_lng}')
# 必須フィールド検証
with_event = Location2025.objects.exclude(event__isnull=True).count()
with_cp_name = Location2025.objects.exclude(cp_name__isnull=True).exclude(cp_name='').count()
print(f'eventリンクあり: {with_event}')
print(f'cp_nameあり: {with_cp_name}')
return {
'total': total_migrated,
'fields': migrated_stats,
'with_location': with_location,
'with_lat_lng': with_lat_lng,
'with_event': with_event,
'with_cp_name': with_cp_name
}
def generate_comparison_report(source_stats, migrated_stats):
"""移行前後比較レポート"""
print('\n=== 移行前後比較レポート ===')
print(f'総件数比較:')
print(f' 移行前: {source_stats["total"]:,}')
print(f' 移行後: {migrated_stats["total"]:,}')
print(f' 移行率: {(migrated_stats["total"] / source_stats["total"] * 100):.1f}%')
print(f'\nフィールド別データ保持率:')
for field in source_stats['fields']:
if field in migrated_stats['fields']:
source_count = source_stats['fields'][field]
migrated_count = migrated_stats['fields'][field]
if source_count > 0:
retention_rate = (migrated_count / source_count * 100)
print(f' {field}: {migrated_count:,}/{source_count:,}件 ({retention_rate:.1f}%)')
else:
print(f' {field}: {migrated_count:,}/0件 (N/A)')
def analyze_event_distribution():
"""イベント別分布分析"""
print('\n=== イベント別分布分析 ===')
event_stats = {}
for location in Location2025.objects.select_related('event'):
event_name = location.event.event_name if location.event else 'No Event'
event_code = location.event.event_code if location.event else 'No Code'
key = f"{event_code} ({event_name})"
event_stats[key] = event_stats.get(key, 0) + 1
# 件数順でソート
sorted_events = sorted(event_stats.items(), key=lambda x: x[1], reverse=True)
print(f'総イベント数: {len(sorted_events)}')
print(f'上位イベント:')
for i, (event_key, count) in enumerate(sorted_events[:10], 1):
print(f' {i:2d}. {event_key}: {count:,}')
return event_stats
def sample_data_verification():
"""サンプルデータ検証"""
print('\n=== サンプルデータ検証 ===')
# 各種データパターンのサンプルを取得
samples = []
# 写真データありのサンプル
photo_sample = Location2025.objects.filter(photos__isnull=False).exclude(photos='').first()
if photo_sample:
samples.append(('写真データあり', photo_sample))
# remarkデータありのサンプル
remark_sample = Location2025.objects.filter(remark__isnull=False).exclude(remark='').first()
if remark_sample:
samples.append(('詳細説明あり', remark_sample))
# 高ポイントのサンプル
high_point_sample = Location2025.objects.filter(cp_point__gt=50).first()
if high_point_sample:
samples.append(('高ポイント', high_point_sample))
# 通常サンプル
if not samples:
normal_sample = Location2025.objects.first()
if normal_sample:
samples.append(('通常データ', normal_sample))
for sample_type, sample in samples[:3]:
print(f'\n{sample_type}サンプル】')
print(f' CP番号: {sample.cp_number}')
print(f' CP名: {sample.cp_name}')
print(f' CPポイント: {sample.cp_point}')
print(f' フォトポイント: {sample.photo_point}')
print(f' sub_loc_id: {sample.sub_loc_id}')
print(f' subcategory: {sample.subcategory}')
# データ長を制限して表示
def truncate_text(text, max_len=30):
if not text:
return '(空)'
return text[:max_len] + '...' if len(text) > max_len else text
print(f' 写真: {truncate_text(sample.photos)}')
print(f' 動画: {truncate_text(sample.videos)}')
print(f' 詳細: {truncate_text(sample.remark)}')
print(f' タグ: {truncate_text(sample.tags)}')
print(f' 評価値: {truncate_text(sample.evaluation_value)}')
print(f' 隠し: {sample.hidden_location}')
print(f' イベント: {sample.event.event_name if sample.event else "None"}')
def main():
"""メイン実行関数"""
User = get_user_model()
default_user = User.objects.first()
print('='*60)
print('Location → Location2025 完全移行スクリプト(統計検証付き)')
print('='*60)
# 1. 移行前データ分析
source_stats = analyze_source_data()
# 2. 既存Location2025データ削除
print('\n=== 既存データクリア ===')
deleted_count = Location2025.objects.count()
Location2025.objects.all().delete()
print(f'削除済み: {deleted_count}')
# 3. NewEvent2のevent_codeマップ作成
print('\n=== Event Code マッピング ===')
events = NewEvent2.objects.filter(event_code__isnull=False).exclude(event_code='')
event_code_map = {}
for event in events:
event_code_map[event.event_code] = event
print(f'有効なevent_code数: {len(event_code_map)}')
# 4. データ移行実行
print('\n=== データ移行実行 ===')
locations = Location.objects.all()
processed_combinations = set()
migrated_count = 0
skipped_count = 0
error_count = 0
event_migration_stats = defaultdict(int)
for location in locations:
try:
# groupが空の場合はスキップ
if not location.group:
skipped_count += 1
continue
# location.groupに含まれるevent_codeを検索
matched_event = None
matched_event_code = None
for event_code, event in event_code_map.items():
if event_code in location.group:
matched_event = event
matched_event_code = event_code
break
# マッチするevent_codeがない場合はスキップ
if not matched_event:
skipped_count += 1
continue
# cp_number + event_idの組み合わせを確認
combination_key = (location.cp, matched_event.id)
if combination_key in processed_combinations:
skipped_count += 1
continue
# この組み合わせを処理済みとしてマーク
processed_combinations.add(combination_key)
# MultiPointからPointに変換
point_location = None
if location.geom and len(location.geom) > 0:
first_point = location.geom[0]
point_location = Point(first_point.x, first_point.y)
elif location.longitude and location.latitude:
point_location = Point(location.longitude, location.latitude)
# Location2025レコードを作成
location2025, created = Location2025.objects.update_or_create(
cp_number=location.cp,
event=matched_event,
defaults={
'cp_name': location.location_name or '',
'sub_loc_id': location.sub_loc_id or '',
'subcategory': location.subcategory or '',
'latitude': location.latitude or 0.0,
'longitude': location.longitude or 0.0,
'location': point_location,
'cp_point': int(location.checkin_point) if location.checkin_point else 0,
'photo_point': int(location.checkin_point) if location.checkin_point else 0,
'buy_point': int(location.buy_point) if location.buy_point else 0,
'checkin_radius': location.checkin_radius or 100.0,
'auto_checkin': location.auto_checkin or False,
'shop_closed': location.shop_closed or False,
'shop_shutdown': location.shop_shutdown or False,
'opening_hours': '',
'address': location.address or '',
'phone': location.phone or '',
'website': '',
'description': location.remark or '',
# 追加フィールド
'photos': location.photos or '',
'videos': location.videos or '',
'remark': location.remark or '',
'tags': location.tags or '',
'evaluation_value': location.evaluation_value or '',
'hidden_location': location.hidden_location or False,
# 管理情報
'is_active': True,
'sort_order': 0,
'csv_source_file': 'migration_from_location',
'created_by': default_user,
'updated_by': default_user,
}
)
if created:
migrated_count += 1
event_migration_stats[matched_event_code] += 1
if migrated_count % 100 == 0:
print(f'進捗: {migrated_count:,}件完了')
except Exception as e:
print(f'❌ エラー: CP {location.cp} - {str(e)}')
error_count += 1
# 5. 移行結果サマリー
print(f'\n=== 移行結果サマリー ===')
print(f'移行完了: {migrated_count:,}')
print(f'スキップ: {skipped_count:,}')
print(f'エラー: {error_count:,}')
print(f'総処理: {migrated_count + skipped_count + error_count:,}')
# 6. 移行後データ検証
migrated_stats = validate_migration_data(source_stats)
# 7. 比較レポート生成
generate_comparison_report(source_stats, migrated_stats)
# 8. イベント別分布分析
event_distribution = analyze_event_distribution()
# 9. サンプルデータ検証
sample_data_verification()
# 10. 最終検証サマリー
print('\n' + '='*60)
print('🎯 移行完了検証サマリー')
print('='*60)
success_rate = (migrated_count / source_stats['total'] * 100) if source_stats['total'] > 0 else 0
print(f'✅ 総移行成功率: {success_rate:.1f}% ({migrated_count:,}/{source_stats["total"]:,}件)')
print(f'✅ エラー率: {(error_count / source_stats["total"] * 100):.1f}% ({error_count:,}件)')
print(f'✅ 最終Location2025件数: {Location2025.objects.count():,}')
print(f'✅ 対応イベント数: {len(event_distribution)}')
# データ品質スコア算出
quality_score = 0
if migrated_stats['with_event'] == migrated_stats['total']:
quality_score += 25 # 全てにイベントがリンクされている
if migrated_stats['with_cp_name'] >= migrated_stats['total'] * 0.95:
quality_score += 25 # 95%以上にCP名がある
if migrated_stats['fields']['photos'] >= migrated_stats['total'] * 0.8:
quality_score += 25 # 80%以上に写真データがある
if migrated_stats['fields']['remark'] >= migrated_stats['total'] * 0.8:
quality_score += 25 # 80%以上に詳細説明がある
print(f'✅ データ品質スコア: {quality_score}/100点')
if quality_score >= 90:
print('🏆 優秀:本格運用準備完了')
elif quality_score >= 70:
print('🥉 良好:運用可能レベル')
elif quality_score >= 50:
print('⚠️ 要改善:一部データ補完推奨')
else:
print('❌ 要対応:データ品質に課題あり')
print('\n✅ 全フィールド対応の完全データ移行が正常に完了しました')
if __name__ == "__main__":
main()