How to Save ValueInvestorsClub Ideas (Text & Images) with Python

⚡Python code to run in Google Colab.⚡

(When you run the code, the images and text are saved together in a folder and downloaded to your computer as a ZIP file.)


Sometimes you don’t just want to read an investment write-up — you want to study it deeply. Maybe you’d like to:

  • Download the text and images, then ask questions about it with an AI like ChatGPT
  • Cross-check the content with ChatGPT or other AI
  • Or, if English isn’t your first language (like me with Korean), translate and read it more comfortably in your native tongue

For any of those cases, having a clean offline copy of the analysis is a game-changer. This little Python script does exactly that.

What It Does

  • Logs into VIC using your browser session cookies
  • Pulls the main idea description
  • Saves any images locally (even if they’re Base64-encoded)
  • Prints a clean text-only version for easy reading

How to Use It

1. Copy Your VIC Cookies

  1. Open VIC in your browser, log in, then open the Developer Tools (F12).
  2. Go to the Application tab (if you don’t see it, click the >> icon to reveal hidden tabs),
  3. then find the Cookies section for valueinvestorsclub.com.
  4. Copy the following values:
  • vic_session (required)
  • cf_clearance (optional)
  • __cf_bm (optional, if it exists)

Paste them into the script here:

COOKIES_INPUT = {
    "vic_session": "paste_here",             # required
    "cf_clearance": "paste_here",            # optional
    "__cf_bm": "if_exists_paste_here",       # optional
}

2. Copy the post URL from VIC and paste it

URL = "article_url"

COOKIES_INPUT = {
    "vic_session": "paste_here",             # required
    "cf_clearance": "paste_here",            # optional
    "__cf_bm": "if_exists_paste_here",       # optional
}

Complete Python script

# Colab one-shot: Save all images + extract text + zip + auto-download

import re, os, base64, zipfile, mimetypes
import requests
from urllib.parse import quote, urljoin, urlparse
from bs4 import BeautifulSoup
from datetime import datetime
from google.colab import files  # Colab-only download

# ====== Settings ======
URL = "Type_Url_here"

# NOTE: These are private cookies — do not share them externally.
COOKIES_INPUT = {
    "vic_session": "paste_here",     # required
    "cf_clearance": "paste_if_exists",        # optional
    "__cf_bm": "paste_if_exists",             # optional
}

# ====== Utils ======
SAFE_SYMBOLS = "!#$%&'()*+,-./:;<=>?@[]^_`{|}~"
def sanitize_cookie_value(v: str) -> str:
    try:
        v.encode("latin-1")
        return v
    except UnicodeEncodeError:
        return quote(v, safe=SAFE_SYMBOLS)

def safe_name(s: str, fallback="download"):
    s = re.sub(r"[\\/:*?\"<>|]+", "_", (s or "").strip())
    s = re.sub(r"\s+", " ", s).strip()
    return s if s else fallback

def choose_ext_from_url_or_type(url_path: str, content_type: str | None):
    if content_type:
        main, _, sub = content_type.partition("/")
        if main == "image" and sub:
            sub = sub.split(";")[0].strip().lower()
            if sub == "jpeg": return ".jpg"
            if sub in {"jpg","png","gif","webp","bmp","tiff","svg+xml"}:
                return ".svg" if sub == "svg+xml" else f".{sub}"
    guess = os.path.splitext(urlparse(url_path).path)[1].lower()
    if guess in {".jpg",".jpeg",".png",".gif",".webp",".bmp",".tif",".tiff",".svg"}:
        return ".jpg" if guess == ".jpeg" else guess
    mt, _ = mimetypes.guess_type(url_path)
    if mt and mt.startswith("image/"):
        return ".jpg" if mt.endswith("jpeg") else f".{mt.split('/')[1]}"
    return ".bin"

# ====== Session/Cookies ======
cookies = {k: sanitize_cookie_value(v) for k, v in COOKIES_INPUT.items() if v}
sess = requests.Session()
for k, v in cookies.items():
    sess.cookies.set(k, v, domain="valueinvestorsclub.com")
headers = {
    "User-Agent": "Mozilla/5.0",
    "Accept-Language": "en-US,en;q=0.9,ko;q=0.8",
    "Referer": URL,
}

# ====== Fetch page ======
resp = sess.get(URL, headers=headers, timeout=30)
resp.raise_for_status()
html = resp.text

# Title / working folder
soup_full = BeautifulSoup(html, "html.parser")
page_title = soup_full.title.get_text(strip=True) if soup_full.title else ""
base_name = safe_name(page_title) or "VIC_Idea"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
work_dir = safe_name(f"{base_name}_{timestamp}")
os.makedirs(work_dir, exist_ok=True)

# ====== Extract main content section ======
pat = re.compile(
    r'(<div\s+id=["\']idea_description_div["\'][^>]*>.*?<!--\s*tab-pane\s+description\s+end\s*-->)',
    re.DOTALL | re.IGNORECASE
)
m = pat.search(html)
if not m:
    pat2 = re.compile(
        r'(<div\s+id=["\']idea_description_div["\'][^>]*>.*?<!--.*?description.*?end.*?-->)',
        re.DOTALL | re.IGNORECASE
    )
    m = pat2.search(html)
if not m:
    raise RuntimeError("Target section not found.")

frag_html = m.group(1)
soup = BeautifulSoup(frag_html, "html.parser")

# ====== Save text ======
text_only = soup.get_text(separator="\n", strip=True)
txt_path = os.path.join(work_dir, f"{safe_name(base_name)}.txt")
with open(txt_path, "w", encoding="utf-8") as f:
    f.write(text_only)

# ====== Save images (data: URIs + external src) ======
img_dir = os.path.join(work_dir, "images")
os.makedirs(img_dir, exist_ok=True)
img_count, errors = 0, []

for img in soup.find_all("img"):
    src = (img.get("src") or "").strip()
    if not src:
        continue
    try:
        if src.startswith("data:image/") and ";base64," in src:
            mime = src.split(";")[0].split("/")[1]
            ext = ".jpg" if mime.lower() == "jpeg" else f".{mime.lower()}"
            b64_data = src.split(",", 1)[1]
            img_data = base64.b64decode(b64_data)
            img_count += 1
            with open(os.path.join(img_dir, f"image_{img_count:03d}{ext}"), "wb") as f:
                f.write(img_data)
        else:
            abs_url = urljoin(URL, src)
            r = sess.get(abs_url, headers=headers, timeout=30, stream=True)
            r.raise_for_status()
            ext = choose_ext_from_url_or_type(abs_url, r.headers.get("Content-Type", ""))
            img_count += 1
            with open(os.path.join(img_dir, f"image_{img_count:03d}{ext}"), "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
    except Exception as e:
        errors.append(f"{src} -> {e}")

# ====== Zip and trigger download ======
zip_path = f"{safe_name(base_name)}_{timestamp}.zip"
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
    zf.write(txt_path, arcname=os.path.join(os.path.basename(work_dir), os.path.basename(txt_path)))
    for fn in sorted(os.listdir(img_dir)):
        zf.write(os.path.join(img_dir, fn), arcname=os.path.join(os.path.basename(work_dir), "images", fn))

print(f"Text saved to: {txt_path}")
print(f"Images saved: {img_count} file(s) -> {img_dir}")
if errors:
    print("Failed downloads:")
    for e in errors:
        print(" -", e)
print(f"ZIP created: {zip_path}")

# Colab download trigger
files.download(zip_path)

💡 Pro Tip: Your cookies are like passwords — keep them private. And of course, only scrape content you’re allowed to view.

이 글의 목차
    면책: 이 글은 개인적 의견을 담은 정보 제공용 기록이며 투자 조언 또는 투자 권유가 아닙니다.
    링크가 복사되었습니다.