How to Save ValueInvestorsClub Ideas (Text & Images) with Python

⚡Python code to run in Google Colab.⚡

(When you run the code, the images and text are saved together in a folder and downloaded to your computer as a ZIP file.)

Sometimes you don’t just want to read an investment write-up — you want to study it deeply. Maybe you’d like to:

Download the text and images, then ask questions about it with an AI like ChatGPT
Cross-check the content with ChatGPT or other AI
Or, if English isn’t your first language (like me with Korean), translate and read it more comfortably in your native tongue

For any of those cases, having a clean offline copy of the analysis is a game-changer. This little Python script does exactly that.

What It Does

Logs into VIC using your browser session cookies
Pulls the main idea description
Saves any images locally (even if they’re Base64-encoded)
Prints a clean text-only version for easy reading

How to Use It

1. Copy Your VIC Cookies

Open VIC in your browser, log in, then open the Developer Tools (F12).
Go to the Application tab (if you don’t see it, click the >> icon to reveal hidden tabs),
then find the Cookies section for valueinvestorsclub.com.
Copy the following values:

vic_session (required)
cf_clearance (optional)
__cf_bm (optional, if it exists)

Paste them into the script here:

COOKIES_INPUT = {
    "vic_session": "paste_here",             # required
    "cf_clearance": "paste_here",            # optional
    "__cf_bm": "if_exists_paste_here",       # optional
}

2. Copy the post URL from VIC and paste it

URL = "article_url"

COOKIES_INPUT = {
    "vic_session": "paste_here",             # required
    "cf_clearance": "paste_here",            # optional
    "__cf_bm": "if_exists_paste_here",       # optional
}

Complete Python script

# Colab one-shot: Save all images + extract text + zip + auto-download

import re, os, base64, zipfile, mimetypes
import requests
from urllib.parse import quote, urljoin, urlparse
from bs4 import BeautifulSoup
from datetime import datetime
from google.colab import files  # Colab-only download

# ====== Settings ======
URL = "Type_Url_here"

# NOTE: These are private cookies — do not share them externally.
COOKIES_INPUT = {
    "vic_session": "paste_here",     # required
    "cf_clearance": "paste_if_exists",        # optional
    "__cf_bm": "paste_if_exists",             # optional
}

# ====== Utils ======
SAFE_SYMBOLS = "!#$%&'()*+,-./:;<=>?@[]^_`{|}~"
def sanitize_cookie_value(v: str) -> str:
    try:
        v.encode("latin-1")
        return v
    except UnicodeEncodeError:
        return quote(v, safe=SAFE_SYMBOLS)

def safe_name(s: str, fallback="download"):
    s = re.sub(r"[\\/:*?\"<>|]+", "_", (s or "").strip())
    s = re.sub(r"\s+", " ", s).strip()
    return s if s else fallback

def choose_ext_from_url_or_type(url_path: str, content_type: str | None):
    if content_type:
        main, _, sub = content_type.partition("/")
        if main == "image" and sub:
            sub = sub.split(";")[0].strip().lower()
            if sub == "jpeg": return ".jpg"
            if sub in {"jpg","png","gif","webp","bmp","tiff","svg+xml"}:
                return ".svg" if sub == "svg+xml" else f".{sub}"
    guess = os.path.splitext(urlparse(url_path).path)[1].lower()
    if guess in {".jpg",".jpeg",".png",".gif",".webp",".bmp",".tif",".tiff",".svg"}:
        return ".jpg" if guess == ".jpeg" else guess
    mt, _ = mimetypes.guess_type(url_path)
    if mt and mt.startswith("image/"):
        return ".jpg" if mt.endswith("jpeg") else f".{mt.split('/')[1]}"
    return ".bin"

# ====== Session/Cookies ======
cookies = {k: sanitize_cookie_value(v) for k, v in COOKIES_INPUT.items() if v}
sess = requests.Session()
for k, v in cookies.items():
    sess.cookies.set(k, v, domain="valueinvestorsclub.com")
headers = {
    "User-Agent": "Mozilla/5.0",
    "Accept-Language": "en-US,en;q=0.9,ko;q=0.8",
    "Referer": URL,
}

# ====== Fetch page ======
resp = sess.get(URL, headers=headers, timeout=30)
resp.raise_for_status()
html = resp.text

# Title / working folder
soup_full = BeautifulSoup(html, "html.parser")
page_title = soup_full.title.get_text(strip=True) if soup_full.title else ""
base_name = safe_name(page_title) or "VIC_Idea"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
work_dir = safe_name(f"{base_name}_{timestamp}")
os.makedirs(work_dir, exist_ok=True)

# ====== Extract main content section ======
pat = re.compile(
    r'(<div\s+id=["\']idea_description_div["\'][^>]*>.*?<!--\s*tab-pane\s+description\s+end\s*-->)',
    re.DOTALL | re.IGNORECASE
)
m = pat.search(html)
if not m:
    pat2 = re.compile(
        r'(<div\s+id=["\']idea_description_div["\'][^>]*>.*?<!--.*?description.*?end.*?-->)',
        re.DOTALL | re.IGNORECASE
    )
    m = pat2.search(html)
if not m:
    raise RuntimeError("Target section not found.")

frag_html = m.group(1)
soup = BeautifulSoup(frag_html, "html.parser")

# ====== Save text ======
text_only = soup.get_text(separator="\n", strip=True)
txt_path = os.path.join(work_dir, f"{safe_name(base_name)}.txt")
with open(txt_path, "w", encoding="utf-8") as f:
    f.write(text_only)

# ====== Save images (data: URIs + external src) ======
img_dir = os.path.join(work_dir, "images")
os.makedirs(img_dir, exist_ok=True)
img_count, errors = 0, []

for img in soup.find_all("img"):
    src = (img.get("src") or "").strip()
    if not src:
        continue
    try:
        if src.startswith("data:image/") and ";base64," in src:
            mime = src.split(";")[0].split("/")[1]
            ext = ".jpg" if mime.lower() == "jpeg" else f".{mime.lower()}"
            b64_data = src.split(",", 1)[1]
            img_data = base64.b64decode(b64_data)
            img_count += 1
            with open(os.path.join(img_dir, f"image_{img_count:03d}{ext}"), "wb") as f:
                f.write(img_data)
        else:
            abs_url = urljoin(URL, src)
            r = sess.get(abs_url, headers=headers, timeout=30, stream=True)
            r.raise_for_status()
            ext = choose_ext_from_url_or_type(abs_url, r.headers.get("Content-Type", ""))
            img_count += 1
            with open(os.path.join(img_dir, f"image_{img_count:03d}{ext}"), "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
    except Exception as e:
        errors.append(f"{src} -> {e}")

# ====== Zip and trigger download ======
zip_path = f"{safe_name(base_name)}_{timestamp}.zip"
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
    zf.write(txt_path, arcname=os.path.join(os.path.basename(work_dir), os.path.basename(txt_path)))
    for fn in sorted(os.listdir(img_dir)):
        zf.write(os.path.join(img_dir, fn), arcname=os.path.join(os.path.basename(work_dir), "images", fn))

print(f"Text saved to: {txt_path}")
print(f"Images saved: {img_count} file(s) -> {img_dir}")
if errors:
    print("Failed downloads:")
    for e in errors:
        print(" -", e)
print(f"ZIP created: {zip_path}")

# Colab download trigger
files.download(zip_path)

💡 Pro Tip: Your cookies are like passwords — keep them private. And of course, only scrape content you’re allowed to view.

블로그 글 전체 목록 바로가기

How to Save ValueInvestorsClub Ideas (Text & Images) with Python

What It Does

How to Use It

1. Copy Your VIC Cookies

2. Copy the post URL from VIC and paste it

Complete Python script

블로그 소개

카테고리

태그 모아보기

블로그 글 전체 목록 바로가기

How to Save ValueInvestorsClub Ideas (Text & Images) with Python

What It Does

How to Use It

1. Copy Your VIC Cookies

2. Copy the post URL from VIC and paste it

Complete Python script

관련 글

블로그 소개

카테고리