# Pastebin bgstSdQ4 # example4-extrafiles.py - get info about all files in first thread from __future__ import print_function import py8chan import sys import os import requests import json def mkdirs(path): """Make directory, if it doesn't exist.""" if not os.path.exists(path): os.makedirs(path) def download_file(local_filename, url, clobber=False): """Download the given file. Clobber overwrites file if exists.""" dir_name = os.path.dirname(local_filename) mkdirs(dir_name) if clobber or not os.path.exists(local_filename): i = requests.get(url) # if not exists if i.status_code == 404: print('Failed to download file:', local_filename, url) return False # write out in 1MB chunks chunk_size_in_bytes = 1024*1024 # 1MB with open(local_filename, 'wb') as local_file: for chunk in i.iter_content(chunk_size=chunk_size_in_bytes): local_file.write(chunk) return True def download_json(local_filename, url, clobber=False): """Download the given JSON file, and pretty-print before we output it.""" original_data = download_file(local_filename, url, clobber) # write reformatted json with open(local_filename, 'w') as json_file: json_file.write(json.dumps(original_data, sort_keys=True, indent=2, separators=(',', ': '))) def main(): if len(sys.argv) < 2 or len(sys.argv) > 3: print("Quick and dirty 8chan Archiver") print("%s - Save the JSON and all images for an 8chan post." % (sys.argv[0])) print("\tUsage: %s " % (sys.argv[0])) sys.exit(1) board_name = sys.argv[1] thread_id = sys.argv[2] # grab the first thread on the board by checking first page board = py8chan.Board(board_name) thread = board.get_thread(thread_id) # create folders path = os.path.join(os.getcwd(), board_name, thread_id) images_path = os.path.join(path, "images") mkdirs(images_path) # archive the thread JSON json_url = "http://8ch.net/%s/res/%s.json" % (board_name, thread_id) download_json(os.path.join(path, "%s.json" % thread_id), json_url) # record the url of every file on the first thread, even extra files in posts img_urls = [] for img_fname in thread.filenames(): print("Downloading %s..." % img_fname) download_file(os.path.join(images_path, "%s" % img_fname), "http://8ch.net/b/src/%s" % img_fname) if __name__ == '__main__': main()