#!/usr/bin/env python3 import sys import datetime import json import random import time import requests def get_all(video_id, from_time=datetime.datetime.fromisoformat('2020-01-01T00:00:00+00:00')): base_url = f'https://public.openrec.tv/external/api/v5/movies/{video_id}/chats' out = [] known_ids = set() sess = requests.session() while True: t_gmt = from_time.astimezone(datetime.timezone.utc) time_str = (f'{t_gmt.year:04d}-{t_gmt.month:02d}-{t_gmt.day:02d}T' f'{t_gmt.hour:02d}:{t_gmt.minute:02d}:{t_gmt.second:02d}.{t_gmt.microsecond // 1000:03d}Z') r = sess.get(base_url, params={'from_created_at': time_str, 'is_including_system_message': 'false'}) r.raise_for_status() comments: list = r.json() # check dup for c in comments.copy(): comment_id = c['id'] if comment_id in known_ids: comments.remove(c) else: known_ids.add(comment_id) if len(comments) == 0: break out.extend(comments) new_from_time = datetime.datetime.fromisoformat(comments[-1]['posted_at']) print(f'{from_time.isoformat()} - {new_from_time.isoformat()}, append {len(comments)} comments') from_time = new_from_time time.sleep(random.random() * 1 + 1) return out def main(): if len(sys.argv) > 1: video_id = sys.argv[1] else: video_id = input('Input video id (11-character alphanumeric string): ') x = get_all(video_id) with open(f'{video_id}.json', 'w', encoding='utf-8') as f: json.dump(x, f, ensure_ascii=False, separators=(',', ':')) with open(f'{video_id}-formatted.json', 'w', encoding='utf-8') as f: json.dump(x, f, ensure_ascii=False, indent=1) print('Comments saved') if __name__ == '__main__': main()