add location migrate

This commit is contained in:
2025-08-30 03:48:07 +09:00
parent cf0adb34f9
commit 596b7313dd
11 changed files with 1021 additions and 3 deletions

View File

@ -0,0 +1,397 @@
#!/usr/bin/env python
"""
LocationからLocation2025への完全データ移行スクリプト統計検証付き
機能:
- 全フィールド対応の完全データ移行
- リアルタイム統計検証
- データ品質チェック
- 移行前後の比較
- 詳細レポート生成
"""
import os
import django
from collections import defaultdict, Counter
# Django設定
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
django.setup()
from rog.models import Location, Location2025, NewEvent2
from django.contrib.auth import get_user_model
from django.contrib.gis.geos import Point
def analyze_source_data():
"""移行前のデータ分析"""
print('=== 移行前データ分析 ===')
total_locations = Location.objects.count()
print(f'総Location件数: {total_locations}')
# グループ別統計
with_group = Location.objects.exclude(group__isnull=True).exclude(group='').count()
without_group = total_locations - with_group
print(f'groupありLocation: {with_group}')
print(f'groupなしLocation: {without_group}')
# 座標データ統計
with_geom = Location.objects.exclude(geom__isnull=True).count()
with_lat_lng = Location.objects.exclude(longitude__isnull=True).exclude(latitude__isnull=True).count()
print(f'geom座標あり: {with_geom}')
print(f'lat/lng座標あり: {with_lat_lng}')
# フィールド統計
fields_stats = {}
text_fields = ['photos', 'videos', 'remark', 'tags', 'evaluation_value', 'sub_loc_id', 'subcategory']
numeric_fields = ['checkin_point', 'buy_point']
boolean_fields = ['hidden_location']
for field in text_fields:
if hasattr(Location, field):
count = Location.objects.exclude(**{f'{field}__isnull': True}).exclude(**{field: ''}).count()
fields_stats[field] = count
print(f'{field}データあり: {count}')
for field in numeric_fields:
if hasattr(Location, field):
count = Location.objects.exclude(**{f'{field}__isnull': True}).exclude(**{field: 0}).count()
fields_stats[field] = count
print(f'{field}データあり: {count}')
for field in boolean_fields:
if hasattr(Location, field):
count = Location.objects.filter(**{field: True}).count()
fields_stats[field] = count
print(f'{field}データあり: {count}')
return {
'total': total_locations,
'with_group': with_group,
'without_group': without_group,
'with_geom': with_geom,
'with_lat_lng': with_lat_lng,
'fields': fields_stats
}
def validate_migration_data(source_stats):
"""移行後データ検証"""
print('\n=== 移行後データ検証 ===')
total_migrated = Location2025.objects.count()
print(f'移行完了件数: {total_migrated}')
# フィールド検証
migrated_stats = {}
field_mapping = {
'photos': 'photos',
'videos': 'videos',
'remark': 'remark',
'tags': 'tags',
'evaluation_value': 'evaluation_value',
'hidden_location': 'hidden_location',
'sub_loc_id': 'sub_loc_id',
'subcategory': 'subcategory'
}
for source_field, target_field in field_mapping.items():
if source_field == 'hidden_location':
count = Location2025.objects.filter(**{target_field: True}).count()
else:
count = Location2025.objects.exclude(**{f'{target_field}__isnull': True}).exclude(**{target_field: ''}).count()
migrated_stats[source_field] = count
print(f'{target_field}データあり: {count}')
# 座標検証
with_location = Location2025.objects.exclude(location__isnull=True).count()
with_lat_lng = Location2025.objects.exclude(longitude__isnull=True).exclude(latitude__isnull=True).count()
print(f'location座標あり: {with_location}')
print(f'lat/lng座標あり: {with_lat_lng}')
# 必須フィールド検証
with_event = Location2025.objects.exclude(event__isnull=True).count()
with_cp_name = Location2025.objects.exclude(cp_name__isnull=True).exclude(cp_name='').count()
print(f'eventリンクあり: {with_event}')
print(f'cp_nameあり: {with_cp_name}')
return {
'total': total_migrated,
'fields': migrated_stats,
'with_location': with_location,
'with_lat_lng': with_lat_lng,
'with_event': with_event,
'with_cp_name': with_cp_name
}
def generate_comparison_report(source_stats, migrated_stats):
"""移行前後比較レポート"""
print('\n=== 移行前後比較レポート ===')
print(f'総件数比較:')
print(f' 移行前: {source_stats["total"]:,}')
print(f' 移行後: {migrated_stats["total"]:,}')
print(f' 移行率: {(migrated_stats["total"] / source_stats["total"] * 100):.1f}%')
print(f'\nフィールド別データ保持率:')
for field in source_stats['fields']:
if field in migrated_stats['fields']:
source_count = source_stats['fields'][field]
migrated_count = migrated_stats['fields'][field]
if source_count > 0:
retention_rate = (migrated_count / source_count * 100)
print(f' {field}: {migrated_count:,}/{source_count:,}件 ({retention_rate:.1f}%)')
else:
print(f' {field}: {migrated_count:,}/0件 (N/A)')
def analyze_event_distribution():
"""イベント別分布分析"""
print('\n=== イベント別分布分析 ===')
event_stats = {}
for location in Location2025.objects.select_related('event'):
event_name = location.event.event_name if location.event else 'No Event'
event_code = location.event.event_code if location.event else 'No Code'
key = f"{event_code} ({event_name})"
event_stats[key] = event_stats.get(key, 0) + 1
# 件数順でソート
sorted_events = sorted(event_stats.items(), key=lambda x: x[1], reverse=True)
print(f'総イベント数: {len(sorted_events)}')
print(f'上位イベント:')
for i, (event_key, count) in enumerate(sorted_events[:10], 1):
print(f' {i:2d}. {event_key}: {count:,}')
return event_stats
def sample_data_verification():
"""サンプルデータ検証"""
print('\n=== サンプルデータ検証 ===')
# 各種データパターンのサンプルを取得
samples = []
# 写真データありのサンプル
photo_sample = Location2025.objects.filter(photos__isnull=False).exclude(photos='').first()
if photo_sample:
samples.append(('写真データあり', photo_sample))
# remarkデータありのサンプル
remark_sample = Location2025.objects.filter(remark__isnull=False).exclude(remark='').first()
if remark_sample:
samples.append(('詳細説明あり', remark_sample))
# 高ポイントのサンプル
high_point_sample = Location2025.objects.filter(cp_point__gt=50).first()
if high_point_sample:
samples.append(('高ポイント', high_point_sample))
# 通常サンプル
if not samples:
normal_sample = Location2025.objects.first()
if normal_sample:
samples.append(('通常データ', normal_sample))
for sample_type, sample in samples[:3]:
print(f'\n{sample_type}サンプル】')
print(f' CP番号: {sample.cp_number}')
print(f' CP名: {sample.cp_name}')
print(f' CPポイント: {sample.cp_point}')
print(f' フォトポイント: {sample.photo_point}')
print(f' sub_loc_id: {sample.sub_loc_id}')
print(f' subcategory: {sample.subcategory}')
# データ長を制限して表示
def truncate_text(text, max_len=30):
if not text:
return '(空)'
return text[:max_len] + '...' if len(text) > max_len else text
print(f' 写真: {truncate_text(sample.photos)}')
print(f' 動画: {truncate_text(sample.videos)}')
print(f' 詳細: {truncate_text(sample.remark)}')
print(f' タグ: {truncate_text(sample.tags)}')
print(f' 評価値: {truncate_text(sample.evaluation_value)}')
print(f' 隠し: {sample.hidden_location}')
print(f' イベント: {sample.event.event_name if sample.event else "None"}')
def main():
"""メイン実行関数"""
User = get_user_model()
default_user = User.objects.first()
print('='*60)
print('Location → Location2025 完全移行スクリプト(統計検証付き)')
print('='*60)
# 1. 移行前データ分析
source_stats = analyze_source_data()
# 2. 既存Location2025データ削除
print('\n=== 既存データクリア ===')
deleted_count = Location2025.objects.count()
Location2025.objects.all().delete()
print(f'削除済み: {deleted_count}')
# 3. NewEvent2のevent_codeマップ作成
print('\n=== Event Code マッピング ===')
events = NewEvent2.objects.filter(event_code__isnull=False).exclude(event_code='')
event_code_map = {}
for event in events:
event_code_map[event.event_code] = event
print(f'有効なevent_code数: {len(event_code_map)}')
# 4. データ移行実行
print('\n=== データ移行実行 ===')
locations = Location.objects.all()
processed_combinations = set()
migrated_count = 0
skipped_count = 0
error_count = 0
event_migration_stats = defaultdict(int)
for location in locations:
try:
# groupが空の場合はスキップ
if not location.group:
skipped_count += 1
continue
# location.groupに含まれるevent_codeを検索
matched_event = None
matched_event_code = None
for event_code, event in event_code_map.items():
if event_code in location.group:
matched_event = event
matched_event_code = event_code
break
# マッチするevent_codeがない場合はスキップ
if not matched_event:
skipped_count += 1
continue
# cp_number + event_idの組み合わせを確認
combination_key = (location.cp, matched_event.id)
if combination_key in processed_combinations:
skipped_count += 1
continue
# この組み合わせを処理済みとしてマーク
processed_combinations.add(combination_key)
# MultiPointからPointに変換
point_location = None
if location.geom and len(location.geom) > 0:
first_point = location.geom[0]
point_location = Point(first_point.x, first_point.y)
elif location.longitude and location.latitude:
point_location = Point(location.longitude, location.latitude)
# Location2025レコードを作成
location2025, created = Location2025.objects.update_or_create(
cp_number=location.cp,
event=matched_event,
defaults={
'cp_name': location.location_name or '',
'sub_loc_id': location.sub_loc_id or '',
'subcategory': location.subcategory or '',
'latitude': location.latitude or 0.0,
'longitude': location.longitude or 0.0,
'location': point_location,
'cp_point': int(location.checkin_point) if location.checkin_point else 0,
'photo_point': int(location.checkin_point) if location.checkin_point else 0,
'buy_point': int(location.buy_point) if location.buy_point else 0,
'checkin_radius': location.checkin_radius or 100.0,
'auto_checkin': location.auto_checkin or False,
'shop_closed': location.shop_closed or False,
'shop_shutdown': location.shop_shutdown or False,
'opening_hours': '',
'address': location.address or '',
'phone': location.phone or '',
'website': '',
'description': location.remark or '',
# 追加フィールド
'photos': location.photos or '',
'videos': location.videos or '',
'remark': location.remark or '',
'tags': location.tags or '',
'evaluation_value': location.evaluation_value or '',
'hidden_location': location.hidden_location or False,
# 管理情報
'is_active': True,
'sort_order': 0,
'csv_source_file': 'migration_from_location',
'created_by': default_user,
'updated_by': default_user,
}
)
if created:
migrated_count += 1
event_migration_stats[matched_event_code] += 1
if migrated_count % 100 == 0:
print(f'進捗: {migrated_count:,}件完了')
except Exception as e:
print(f'❌ エラー: CP {location.cp} - {str(e)}')
error_count += 1
# 5. 移行結果サマリー
print(f'\n=== 移行結果サマリー ===')
print(f'移行完了: {migrated_count:,}')
print(f'スキップ: {skipped_count:,}')
print(f'エラー: {error_count:,}')
print(f'総処理: {migrated_count + skipped_count + error_count:,}')
# 6. 移行後データ検証
migrated_stats = validate_migration_data(source_stats)
# 7. 比較レポート生成
generate_comparison_report(source_stats, migrated_stats)
# 8. イベント別分布分析
event_distribution = analyze_event_distribution()
# 9. サンプルデータ検証
sample_data_verification()
# 10. 最終検証サマリー
print('\n' + '='*60)
print('🎯 移行完了検証サマリー')
print('='*60)
success_rate = (migrated_count / source_stats['total'] * 100) if source_stats['total'] > 0 else 0
print(f'✅ 総移行成功率: {success_rate:.1f}% ({migrated_count:,}/{source_stats["total"]:,}件)')
print(f'✅ エラー率: {(error_count / source_stats["total"] * 100):.1f}% ({error_count:,}件)')
print(f'✅ 最終Location2025件数: {Location2025.objects.count():,}')
print(f'✅ 対応イベント数: {len(event_distribution)}')
# データ品質スコア算出
quality_score = 0
if migrated_stats['with_event'] == migrated_stats['total']:
quality_score += 25 # 全てにイベントがリンクされている
if migrated_stats['with_cp_name'] >= migrated_stats['total'] * 0.95:
quality_score += 25 # 95%以上にCP名がある
if migrated_stats['fields']['photos'] >= migrated_stats['total'] * 0.8:
quality_score += 25 # 80%以上に写真データがある
if migrated_stats['fields']['remark'] >= migrated_stats['total'] * 0.8:
quality_score += 25 # 80%以上に詳細説明がある
print(f'✅ データ品質スコア: {quality_score}/100点')
if quality_score >= 90:
print('🏆 優秀:本格運用準備完了')
elif quality_score >= 70:
print('🥉 良好:運用可能レベル')
elif quality_score >= 50:
print('⚠️ 要改善:一部データ補完推奨')
else:
print('❌ 要対応:データ品質に課題あり')
print('\n✅ 全フィールド対応の完全データ移行が正常に完了しました')
if __name__ == "__main__":
main()