๐๏ธ Multiprocessing
Multiprocessing allows you to create processes, which can run in parallel on multiple CPU cores.
Mastering this concept will significantly boost your Python data science skills!
๐ป Code Example:
import multiprocessing as mp import time import os # 1. Basic Process def worker(task_id: int, result_queue: mp.Queue): """CPU-bound simulation โ compress/hash pynfinity data.""" pid = os.getpid() start = time.time() # Simulate heavy CPU work total = sum(i**2 for i in range(500_000)) elapsed = time.time() - start result_queue.put({"task": task_id, "pid": pid, "result": total, "time": round(elapsed, 3)}) # 2. Pool.map โ parallel map over list def score_pynfinity_user(user_id: int) -> dict: score = sum(i % 7 for i in range(100_000)) % 100 return {"user_id": user_id, "score": score, "pid": os.getpid()} if __name__ == "__main__": # โโ Queue-based multi-process โโโโโโโโโโโโโโโโโโโโโโโโโโโโโ queue = mp.Queue() processes = [ mp.Process(target=worker, args=(i, queue)) for i in range(4) ] for p in processes: p.start() for p in processes: p.join() print("Results from Queue:") while not queue.empty(): print(" ", queue.get()) # โโ Pool.map โ simplest pattern โโโโโโโโโโโโโโโโโโโโโโโโโโโ with mp.Pool(processes=mp.cpu_count()) as pool: start = time.time() results = pool.map(score_pynfinity_user, range(20)) elapsed = time.time() - start pids = {r["pid"] for r in results} print(f"\nPool.map: 20 tasks on {len(pids)} processes in {elapsed:.2f}s") for r in results[:5]: print(f" User {r['user_id']:>2}: score={r['score']}, pid={r['pid']}") # โโ Shared state with Manager โโโโโโโโโโโโโโโโโโโโโโโโโโโโโ with mp.Manager() as manager: shared_dict = manager.dict() shared_list = manager.list() def update_shared(idx, d, lst): d[f"key_{idx}"] = idx * 10 lst.append(idx) procs = [mp.Process(target=update_shared, args=(i, shared_dict, shared_list)) for i in range(5)] for p in procs: p.start() for p in procs: p.join() print("\nShared dict:", dict(shared_dict)) print("Shared list:", list(shared_list))
Keep exploring and happy coding! ๐ป