Skip to main content
Photo of DeepakNess DeepakNess

Generating missing meta descriptions using AI

Unproofread notes

On deepakness.com, I had 100+ blog posts that didn't have meta descriptions because for some reason descriptions didn't come back when I migrated my site from WordPress to 11ty. It wasn't possible to manually add "description" properties in the frontmatter to all those .md files for blog posts, so I created this Python script that automatically does that.

The script checks each markdown file in the /blog folder one-by-one, if the "description" property exists, it skips them and if there's no "description" then reads the entire blog post, sends a prompt to OpenAI's GPT-5 and then creates the "description".

Here's the entire script:

#!/usr/bin/env python3
"""
Script to add descriptions to blog posts that don't have them.
Uses OpenAI GPT-5 API to generate descriptions under 140 characters.
"""

import os
import re
import requests
from pathlib import Path

# Hardcode your OpenAI API key here
OPENAI_API_KEY = "YOUR_OPENAI_API_KEY"

BLOG_DIR = Path("content/blog")
API_URL = "https://api.openai.com/v1/responses"


def get_frontmatter_and_content(file_path: Path) -> tuple[str, str, str]:
    """
    Parse a markdown file and return (frontmatter, content, full_text).
    """
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read()

    # Match frontmatter between --- delimiters
    match = re.match(r'^---\n(.*?)\n---\n(.*)$', text, re.DOTALL)
    if match:
        return match.group(1), match.group(2), text
    return "", text, text


def has_description(frontmatter: str) -> bool:
    """
    Check if frontmatter contains a description field.
    """
    # Look for description: at the start of a line
    return bool(re.search(r'^description:', frontmatter, re.MULTILINE))


def generate_description(content: str, title: str) -> str:
    """
    Use OpenAI GPT-5 to generate a description under 140 characters.
    """
    # Truncate content to avoid token limits (first 2000 chars should be enough)
    truncated_content = content[:2000]

    prompt = f"""Read the following blog post and create a brief description for it.
The description MUST be under 140 characters.
Do not use quotes around the description.
Just return the description text, nothing else.

Title: {title}

Content:
{truncated_content}"""

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {OPENAI_API_KEY}"
    }

    payload = {
        "model": "gpt-5",
        "input": prompt
    }

    try:
        response = requests.post(API_URL, headers=headers, json=payload)
        response.raise_for_status()
        data = response.json()

        # Extract the output text from response
        # The output is a list of output objects
        output = data.get("output", [])
        description = ""

        for item in output:
            if item.get("type") == "message":
                content = item.get("content", [])
                for content_item in content:
                    if content_item.get("type") == "output_text":
                        description = content_item.get("text", "").strip()
                        break
                if description:
                    break

        # Ensure it's under 140 characters
        if len(description) > 140:
            description = description[:137] + "..."

        return description
    except requests.exceptions.RequestException as e:
        print(f"  API Error: {e}")
        return ""
    except (KeyError, IndexError, TypeError) as e:
        print(f"  Parse Error: {e}")
        return ""


def get_title_from_frontmatter(frontmatter: str) -> str:
    """
    Extract title from frontmatter.
    """
    match = re.search(r'^title:\s*["\']?(.+?)["\']?\s*$', frontmatter, re.MULTILINE)
    if match:
        return match.group(1).strip('"\'')
    return "Untitled"


def add_description_to_frontmatter(frontmatter: str, description: str) -> str:
    """
    Add description field to frontmatter after the title field.
    """
    lines = frontmatter.split('\n')
    new_lines = []
    description_added = False

    for line in lines:
        new_lines.append(line)
        # Add description after title line
        if line.startswith('title:') and not description_added:
            # Escape any quotes in description
            escaped_desc = description.replace('"', '\\"')
            new_lines.append(f'description: "{escaped_desc}"')
            description_added = True

    return '\n'.join(new_lines)


def update_file(file_path: Path, frontmatter: str, content: str, new_frontmatter: str):
    """
    Write updated content back to file.
    """
    new_text = f"---\n{new_frontmatter}\n---\n{content}"
    with open(file_path, "w", encoding="utf-8") as f:
        f.write(new_text)


def main():
    if OPENAI_API_KEY == "your-openai-api-key-here":
        print("⚠️  Please set your OpenAI API key in the script!")
        print("   Edit add_descriptions.py and replace 'your-openai-api-key-here'")
        return

    # Find all blog post index.md files
    blog_posts = list(BLOG_DIR.glob("*/index.md"))

    print(f"Found {len(blog_posts)} blog posts")
    print("-" * 50)

    posts_updated = 0
    posts_skipped = 0
    posts_failed = 0

    for post_path in sorted(blog_posts):
        slug = post_path.parent.name
        print(f"\nProcessing: {slug}")

        frontmatter, content, _ = get_frontmatter_and_content(post_path)

        if has_description(frontmatter):
            print(f"  ✓ Already has description, skipping")
            posts_skipped += 1
            continue

        title = get_title_from_frontmatter(frontmatter)
        print(f"  → No description found, generating...")

        description = generate_description(content, title)

        if not description:
            print(f"  ✗ Failed to generate description")
            posts_failed += 1
            continue

        print(f"  → Generated: {description}")

        new_frontmatter = add_description_to_frontmatter(frontmatter, description)
        update_file(post_path, frontmatter, content, new_frontmatter)

        print(f"  ✓ Updated successfully")
        posts_updated += 1

    print("\n" + "=" * 50)
    print(f"Summary:")
    print(f"  Updated: {posts_updated}")
    print(f"  Skipped (already had description): {posts_skipped}")
    print(f"  Failed: {posts_failed}")


if __name__ == "__main__":
    main()

You might need to tweak the script a bit to make it work for you, and also replace YOUR_OPENAI_API_KEY with your OpenAI API key.

Comment via email