⚡Python code to run in Google Colab.⚡
(When you run the code, the images and text are saved together in a folder and downloaded to your computer as a ZIP file.)
Sometimes you don’t just want to read an investment write-up — you want to study it deeply. Maybe you’d like to:
- Download the text and images, then ask questions about it with an AI like ChatGPT
- Cross-check the content with ChatGPT or other AI
- Or, if English isn’t your first language (like me with Korean), translate and read it more comfortably in your native tongue
For any of those cases, having a clean offline copy of the analysis is a game-changer. This little Python script does exactly that.
What It Does
- Logs into VIC using your browser session cookies
- Pulls the main idea description
- Saves any images locally (even if they’re Base64-encoded)
- Prints a clean text-only version for easy reading
How to Use It
1. Copy Your VIC Cookies
- Open VIC in your browser, log in, then open the Developer Tools (F12).
- Go to the Application tab (if you don’t see it, click the
>>icon to reveal hidden tabs), - then find the Cookies section for
valueinvestorsclub.com. - Copy the following values:
vic_session(required)cf_clearance(optional)__cf_bm(optional, if it exists)
Paste them into the script here:
COOKIES_INPUT = {
"vic_session": "paste_here", # required
"cf_clearance": "paste_here", # optional
"__cf_bm": "if_exists_paste_here", # optional
}
2. Copy the post URL from VIC and paste it
URL = "article_url"
COOKIES_INPUT = {
"vic_session": "paste_here", # required
"cf_clearance": "paste_here", # optional
"__cf_bm": "if_exists_paste_here", # optional
}
Complete Python script
# Colab one-shot: Save all images + extract text + zip + auto-download
import re, os, base64, zipfile, mimetypes
import requests
from urllib.parse import quote, urljoin, urlparse
from bs4 import BeautifulSoup
from datetime import datetime
from google.colab import files # Colab-only download
# ====== Settings ======
URL = "Type_Url_here"
# NOTE: These are private cookies — do not share them externally.
COOKIES_INPUT = {
"vic_session": "paste_here", # required
"cf_clearance": "paste_if_exists", # optional
"__cf_bm": "paste_if_exists", # optional
}
# ====== Utils ======
SAFE_SYMBOLS = "!#$%&'()*+,-./:;<=>?@[]^_`{|}~"
def sanitize_cookie_value(v: str) -> str:
try:
v.encode("latin-1")
return v
except UnicodeEncodeError:
return quote(v, safe=SAFE_SYMBOLS)
def safe_name(s: str, fallback="download"):
s = re.sub(r"[\\/:*?\"<>|]+", "_", (s or "").strip())
s = re.sub(r"\s+", " ", s).strip()
return s if s else fallback
def choose_ext_from_url_or_type(url_path: str, content_type: str | None):
if content_type:
main, _, sub = content_type.partition("/")
if main == "image" and sub:
sub = sub.split(";")[0].strip().lower()
if sub == "jpeg": return ".jpg"
if sub in {"jpg","png","gif","webp","bmp","tiff","svg+xml"}:
return ".svg" if sub == "svg+xml" else f".{sub}"
guess = os.path.splitext(urlparse(url_path).path)[1].lower()
if guess in {".jpg",".jpeg",".png",".gif",".webp",".bmp",".tif",".tiff",".svg"}:
return ".jpg" if guess == ".jpeg" else guess
mt, _ = mimetypes.guess_type(url_path)
if mt and mt.startswith("image/"):
return ".jpg" if mt.endswith("jpeg") else f".{mt.split('/')[1]}"
return ".bin"
# ====== Session/Cookies ======
cookies = {k: sanitize_cookie_value(v) for k, v in COOKIES_INPUT.items() if v}
sess = requests.Session()
for k, v in cookies.items():
sess.cookies.set(k, v, domain="valueinvestorsclub.com")
headers = {
"User-Agent": "Mozilla/5.0",
"Accept-Language": "en-US,en;q=0.9,ko;q=0.8",
"Referer": URL,
}
# ====== Fetch page ======
resp = sess.get(URL, headers=headers, timeout=30)
resp.raise_for_status()
html = resp.text
# Title / working folder
soup_full = BeautifulSoup(html, "html.parser")
page_title = soup_full.title.get_text(strip=True) if soup_full.title else ""
base_name = safe_name(page_title) or "VIC_Idea"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
work_dir = safe_name(f"{base_name}_{timestamp}")
os.makedirs(work_dir, exist_ok=True)
# ====== Extract main content section ======
pat = re.compile(
r'(<div\s+id=["\']idea_description_div["\'][^>]*>.*?<!--\s*tab-pane\s+description\s+end\s*-->)',
re.DOTALL | re.IGNORECASE
)
m = pat.search(html)
if not m:
pat2 = re.compile(
r'(<div\s+id=["\']idea_description_div["\'][^>]*>.*?<!--.*?description.*?end.*?-->)',
re.DOTALL | re.IGNORECASE
)
m = pat2.search(html)
if not m:
raise RuntimeError("Target section not found.")
frag_html = m.group(1)
soup = BeautifulSoup(frag_html, "html.parser")
# ====== Save text ======
text_only = soup.get_text(separator="\n", strip=True)
txt_path = os.path.join(work_dir, f"{safe_name(base_name)}.txt")
with open(txt_path, "w", encoding="utf-8") as f:
f.write(text_only)
# ====== Save images (data: URIs + external src) ======
img_dir = os.path.join(work_dir, "images")
os.makedirs(img_dir, exist_ok=True)
img_count, errors = 0, []
for img in soup.find_all("img"):
src = (img.get("src") or "").strip()
if not src:
continue
try:
if src.startswith("data:image/") and ";base64," in src:
mime = src.split(";")[0].split("/")[1]
ext = ".jpg" if mime.lower() == "jpeg" else f".{mime.lower()}"
b64_data = src.split(",", 1)[1]
img_data = base64.b64decode(b64_data)
img_count += 1
with open(os.path.join(img_dir, f"image_{img_count:03d}{ext}"), "wb") as f:
f.write(img_data)
else:
abs_url = urljoin(URL, src)
r = sess.get(abs_url, headers=headers, timeout=30, stream=True)
r.raise_for_status()
ext = choose_ext_from_url_or_type(abs_url, r.headers.get("Content-Type", ""))
img_count += 1
with open(os.path.join(img_dir, f"image_{img_count:03d}{ext}"), "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
except Exception as e:
errors.append(f"{src} -> {e}")
# ====== Zip and trigger download ======
zip_path = f"{safe_name(base_name)}_{timestamp}.zip"
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
zf.write(txt_path, arcname=os.path.join(os.path.basename(work_dir), os.path.basename(txt_path)))
for fn in sorted(os.listdir(img_dir)):
zf.write(os.path.join(img_dir, fn), arcname=os.path.join(os.path.basename(work_dir), "images", fn))
print(f"Text saved to: {txt_path}")
print(f"Images saved: {img_count} file(s) -> {img_dir}")
if errors:
print("Failed downloads:")
for e in errors:
print(" -", e)
print(f"ZIP created: {zip_path}")
# Colab download trigger
files.download(zip_path)
💡 Pro Tip: Your cookies are like passwords — keep them private. And of course, only scrape content you’re allowed to view.
