test

霍禺3.webp
import os
import shutil
import markdown
import re
import json
from pathlib import Path
from PIL import Image
import time
from collections import deque, defaultdict
import subprocess

# 設定
vault_dir = Path("Obsidian_Vault_001")
output_dir = Path(r"Y:\nginx_site_data") / "asyncd_filament_database"
top_url  = "https://mizugane.tcpexposer.com/"
home_url = "https://mizugane.tcpexposer.com/asyncd_filament_database/"
site_title = "asyncd_filament_database"
max_depth=4


# HTML テンプレート
HEAD = """<html><head><meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel='icon' href='{top_url}img/favicon.ico'>
<link rel='stylesheet' href='{top_url}css/style2.css'>
{additional_css}
<script src="https://unpkg.com/budoux/bundle/budoux-ja.min.js"></script>

<meta property="og:title" content="{site_title}">
<meta property="og:description" content="{title}">
<meta property="og:image" content="{top_url}img/asyncd_filament_umekome.webp">
<meta property="og:url" content="{top_url}">
<meta property="og:type" content="website">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:title" content="{site_title}">

 """
SITE_TITLE = """
<div class=site_title ><a href="{home_url}"  >{site_title}</a></div>
<hr/>
"""
TITLE = """
<h1>{title}</h1>
""" 
TOP_TITLE = """
<h1>{site_title}</h1>
""" 
FOOT_INDEX = """  
<br><hr/><br><div class="footer-nav"> 
<a href="../">目次へ</a>  
<br><br><br><br>
</div>  
</body></html>"""

FOOT_TOP = """
<br><hr/><br>
<a href="{top_url}">トップページへ</a>
<br><br><br><br>
</div>
</body></html>"""

assets_dir = output_dir / "assets"

# Markdown 初期化
md = markdown.Markdown(
    extensions=[
        'extra',          # テーブル・フェンスコード対応
        'fenced_code',    # ``` を明示的に有効化
        'codehilite',     # シンタックスハイライト
    ],
    extension_configs={
        'codehilite': {'linenums': False}
    },
    output_format='html5'
)
internal_link_map = {}
graph = defaultdict(set)

# ── キャッシュディレクトリ＆メタ情報読み込み ──
vault_name = vault_dir.name
cache_dir = vault_dir.parent / f"{vault_name}_tmp"
cache_dir.mkdir(parents=True, exist_ok=True)

cache_meta_path = cache_dir / "cache_meta.json"
if cache_meta_path.exists():
    with open(cache_meta_path, 'r', encoding='utf-8') as f:
        cache_meta = json.load(f)
else:
    cache_meta = {}

# ── オリジナル削除時はキャッシュもクリア ──
removed = []
for rel, meta in cache_meta.items():
    orig_path = vault_dir / rel
    if not orig_path.exists():
        tmp_file = cache_dir / meta['out_name']
        if tmp_file.exists():
            tmp_file.unlink()
        removed.append(rel)
for rel in removed:
    cache_meta.pop(rel, None)
if removed:
    with open(cache_meta_path, 'w', encoding='utf-8') as f:
        json.dump(cache_meta, f, indent=2)


def prepare_output():
    # ── output_dir の初期化はそのまま ──
    if output_dir.exists():
        if output_dir.is_file():
            output_dir.unlink()
        else:
            shutil.rmtree(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    # ── assets_dir の初期化 ──
    if assets_dir.exists():
        if assets_dir.is_file():
            assets_dir.unlink()          # ファイルなら削除
        else:
            shutil.rmtree(assets_dir)   # ディレクトリなら中身ごと削除
    assets_dir.mkdir(parents=True, exist_ok=True)



def replace_links(md_text):
    css_classes = []

    def repl_embed(match):
        filename_part = match.group(1)
        parts = filename_part.split('|', 1)
        base_filename = parts[0].strip()
        size_spec = parts[1].strip() if len(parts) > 1 else None

        base_name = os.path.basename(base_filename)
        file_ext = os.path.splitext(base_name)[1].lower()

        audio_ext = ('.wav', '.flac', '.ogg', '.m4a')
        video_ext = ('.mp4', '.mov', '.avi', '.mkv', '.webm')

        if file_ext in audio_ext:
            mp3_name = os.path.splitext(base_name)[0] + '.mp3'
            return f'<audio controls src="{home_url}assets/{mp3_name}"></audio>'

        if file_ext in video_ext:
            webm_name = os.path.splitext(base_name)[0] + '.webm'
            size_attr = ''
            if size_spec:
                w, h = size_spec.split('x',1)
                if w.isdigit() and h.isdigit():
                    size_attr = f' width="{w}" height="{h}"'
            return f'<video controls src="{home_url}assets/{webm_name}"{size_attr}></video>'

        if file_ext in ('.png', '.jpg', '.jpeg', '.webp'):
            width = height = None
            if size_spec:
                w, h = size_spec.split('x', 1)
                try:
                    width, height = int(w), int(h)
                except:
                    pass
            class_attr = ''
            if width and height:
                cls = f'img-w{width}-h{height}'
                css_classes.append((cls, width, height))
                class_attr = f' class="{cls}"'
            webp_name = os.path.splitext(base_name)[0] + ".webp"
            return f'<img src="{home_url}assets/{webp_name}" alt="{webp_name}"{class_attr}>'
        else:
            # その他ファイルはリンク
            return f'<a href="{home_url}assets/{base_name}">{base_name}</a>'
    def repl_internal(match):
        filename = match.group(1)
        link_path = internal_link_map.get(filename)
        if link_path:
            return f'<a href="{home_url}{link_path}/">{filename}</a>'
        else:
            return filename
    # マークダウン中の <a href="https://mizugane.tcpexposer.com/asyncd_filament_database/assets/...">...</a> を置換
    md_text = re.sub(r'!\[\[(.+?)\]\]', repl_embed, md_text)
    # 内部リンクはそのまま

    md_text = re.sub(r'(?<!\!)\[\[(.+?)\]\]', repl_internal, md_text)

    # CSSクラスの重複除去（既存処理）
    seen = set()
    unique = []
    for cls, w, h in css_classes:
        if (cls, w, h) not in seen:
            seen.add((cls, w, h))
            unique.append((cls, w, h))
    return md_text, unique



def create_parent_index(parent_dir, title, link):
    if '.obsidian' in str(parent_dir): 
        return
    parent_html_path = output_dir / parent_dir / "index.html"
    parent_html_path.parent.mkdir(parents=True, exist_ok=True)
    with open(parent_html_path, 'a', encoding='utf-8') as f:
        f.write(f'<a href="{home_url}{link}/">{title}</a><br>\n')

def create_directory_index(directory_path, title, links):
    directory_html_path = output_dir / directory_path / "index.html"
    directory_html_path.parent.mkdir(parents=True, exist_ok=True)
    full_dir_path = vault_dir / directory_path
    subdirs = []
    if full_dir_path.exists():
        subdirs = [item.name for item in full_dir_path.iterdir() if item.is_dir()]
    subdir_links_html = ""
    if subdirs:
        subdir_links_html = "<div class='directory-links-box'>\n"
        subdir_links_html += "<b>ディレクトリ</b><br>\n"
        for subdir in sorted(subdirs):
            subdir_url = f"{home_url}{(directory_path / subdir).as_posix()}/"
            subdir_links_html += f"<a href=\"{subdir_url}\">{subdir}</a><br>\n"
        subdir_links_html += "</div>\n"
    links_html = "\n".join([
        f'<a href=\"{home_url}{(directory_path / link).as_posix()}/\">{link}</a><br>'
        for link in links
    ])
    with open(directory_html_path, 'w', encoding='utf-8') as f:
        f.write(HEAD.format(title=title, top_url=top_url, site_title=site_title, additional_css="", home_url=home_url) + SITE_TITLE.format(home_url=home_url, site_title=site_title) + TITLE.format(title=title) + subdir_links_html + links_html + FOOT_INDEX)
def collect_nodes_and_links(start_node):
    visited = {}
    queue = deque([(start_node, 0)])
    visited[start_node] = 0
    while queue:
        node, depth = queue.popleft()
        if depth >= max_depth:
            continue
        for neighbor in graph.get(node, []):
            if neighbor not in visited or visited[neighbor] > depth + 1:
                visited[neighbor] = depth + 1
                queue.append((neighbor, depth + 1))
    nodes_in_scope = [n for n in visited if visited[n] <= max_depth]
    links_in_scope = []
    seen_edges = set()

    for node in nodes_in_scope:
        for neighbor in graph.get(node, []):
            if neighbor in nodes_in_scope:
                edge = tuple(sorted([node, neighbor]))
                if edge not in seen_edges:
                    seen_edges.add(edge)
                    links_in_scope.append({"source": edge[0], "target": edge[1]})

    return nodes_in_scope, links_in_scope


def process_file(md_path, rel_path):
    if '.obsidian' in str(md_path):
        return

    with open(md_path, 'r', encoding='utf-8') as f:
        md_text = f.read()

    md_text, css_classes = replace_links(md_text)
    md.reset()
    html_body = md.convert(md_text)
    html_body = re.sub(
    r'<p>\s*<a href="([^"]+\.mp3)">[^<]+</a>\s*</p>',
    r'<audio controls src="\1"></audio>',
    html_body
    )

    title = rel_path.stem
    parent_dir = rel_path.parent

    if parent_dir != Path('.'):
        create_parent_index(parent_dir, title, rel_path.with_suffix("").as_posix())

    output_page_dir = output_dir / rel_path.with_suffix("")
    output_path = output_page_dir / "index.html"
    output_path.parent.mkdir(parents=True, exist_ok=True)

    # CSSファイルの生成
    css_content = ""
    for cls, w, h in css_classes:
        css_content += f".{cls} {{\n  width: {w}px;\n  height: {h}px;\n}}\n"

    additional_css = ''
    if css_content:
        css_path = output_page_dir / "image_styles.css"
        with open(css_path, 'w', encoding='utf-8') as f:
            f.write(css_content)
        additional_css = '<link rel="stylesheet" href="image_styles.css">'

    # JSONデータ生成（既存処理）
    current_page = title
    nodes_in_scope, links_in_scope = collect_nodes_and_links(current_page)

    nodes_data = []
    for node in nodes_in_scope:
        node_path = internal_link_map.get(node)
        if node_path:
            url = f"{home_url}{node_path}"
            nodes_data.append({"id": node, "path": url})

    json_data = {
        "nodes": nodes_data,
        "links": links_in_scope
    }

    json_path = output_dir / rel_path.with_suffix("") / "data.json"
    with open(json_path, 'w', encoding='utf-8') as f:
        json.dump(json_data, f, indent=2)
    # Generate D3 visualization code
    d3_code = f"""
    <!-- SVG and D3.js Integration -->
    <div id="graph-container" 
        class="graph-container"
        data-current="{current_page}"
        data-path="data.json">
        <svg ></svg>
    </div>
    <script src="https://d3js.org/d3.v7.min.js"></script>
    <script src="{top_url}js/graph.js" type="module"></script>
    """
    head_content = HEAD.format(
        title=title,
        top_url=top_url,
        site_title=site_title,
        additional_css=additional_css,
        home_url=home_url
    )

    site_title_content = SITE_TITLE.format(home_url=home_url, site_title=site_title)

    title_content = TITLE.format(title=title)

    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(
            head_content 
            + site_title_content 
            + title_content 
            + "<budoux-ja>\n"
            + html_body 
            + "\n</budoux-ja>"
            + d3_code 
            + FOOT_INDEX
            )


def save_cache_meta():
    with open(cache_meta_path, 'w', encoding='utf-8') as f:
        json.dump(cache_meta, f, indent=2)

def copy_asset_with_cache(file_path: Path):
    rel = str(file_path.relative_to(vault_dir))
    mtime = file_path.stat().st_mtime
    ext = file_path.suffix.lower()
    # 拡張子に応じた出力名
    if ext in ('.png','.jpg','.jpeg','.webp'):
        out_name = file_path.stem + ".webp"
    elif ext in ('.wav','.flac','.ogg','.m4a'):
        out_name = file_path.stem + ".mp3"
    elif ext in ('.mp4','.mov','.avi','.mkv','.webm'):
        out_name = file_path.stem + ".webm"
    else:
        out_name = file_path.name

    # キャッシュヒット判定
    meta = cache_meta.get(rel)
    if meta and meta['mtime'] == mtime and (cache_dir/meta['out_name']).exists():
        # 既存キャッシュを assets_dir へコピー
        shutil.copy(cache_dir / meta['out_name'], assets_dir / out_name)
        return

    # キャッシュに無ければ新規圧縮 or コピー
    target_cache = cache_dir / out_name
    if ext in ('.png','.jpg','.jpeg','.webp'):
        img = Image.open(file_path)
        img.thumbnail((1024,1024), Image.LANCZOS)
        img.save(target_cache, 'WEBP', quality=85)
    elif ext in ('.mp4','.mov','.avi','.mkv','.webm'):
        subprocess.run([
            'ffmpeg','-y','-i',str(file_path),
            '-c:v','libvpx-vp9','-b:v','0','-crf','30',
            '-c:a','libopus', str(target_cache)
        ], check=True)
    elif ext in ('.wav','.flac','.ogg','.m4a'):
        subprocess.run([
            'ffmpeg','-y','-i',str(file_path),
            '-codec:a','libmp3lame','-b:a','64k', str(target_cache)
        ], check=True)
    else:
        shutil.copy(file_path, target_cache)

    # assets_dir へコピー
    shutil.copy(target_cache, assets_dir / out_name)
    # メタ情報を更新
    cache_meta[rel] = {"out_name": out_name, "mtime": mtime}
    save_cache_meta()

def create_top_index(all_md_paths):
    tree = {}
    for rel_path in all_md_paths:
        parts = Path(rel_path).parts
        current = tree
        for part in parts[:-1]:
            current = current.setdefault(part, {})
        current.setdefault('_files', []).append(parts[-1])
    def render_tree(node, depth=0):
        html = ""
        indent = "&nbsp;" * 4 * depth
        for file in sorted(node.get('_files', [])):
            file_path = '/'.join(node_path + [file])
            html += f"{indent}<a href=\"{home_url}{file_path}/\">{file}</a><br>\n"
        for dir_name in sorted(k for k in node.keys() if k != '_files'):
            node_path.append(dir_name)
            if '.obsidian' not in dir_name:
                html += f"{indent}<b>{dir_name}/</b><br>\n"
                html += render_tree(node[dir_name], depth + 1)
            node_path.pop()
        return html
    node_path = []
    body_html = render_tree(tree)
    parent_dir_links_html = "<div class='directory-links-box'>\n"
    parent_dir_links_html += "<b>ディレクトリ</b><br>\n"
    subdirs = sorted([d.name for d in Path(vault_dir).iterdir() if d.is_dir() and '.obsidian' not in d.name])
    for subdir in subdirs:
        subdir_url = f"{home_url}{subdir}/"
        parent_dir_links_html += f"<a href=\"{subdir_url}\">{subdir}</a><br>\n"
    parent_dir_links_html += "</div>\n"
    output_path = output_dir / "index.html"
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(
            HEAD.format(title=site_title, top_url=top_url, site_title=site_title, additional_css="", home_url=home_url)
            + TOP_TITLE.format(site_title=site_title)            
            + parent_dir_links_html
            + body_html
            + FOOT_TOP.format(top_url=top_url)
)


def build_site():
    global internal_link_map, graph
    # Build internal_link_map
    for root, dirs, files in os.walk(vault_dir):
        if '.obsidian' in dirs:
            dirs.remove('.obsidian')
        for file in files:
            if file.lower().endswith(".md"):
                path = Path(root) / file
                rel_path = path.relative_to(vault_dir)
                internal_link_map[file[:-3]] = rel_path.with_suffix("").as_posix()
    # Build graph
    graph = defaultdict(set)
    for root, dirs, files in os.walk(vault_dir):
        if '.obsidian' in dirs:
            dirs.remove('.obsidian')
        for file in files:
            if file.lower().endswith(".md"):
                path = Path(root) / file
                rel_path = path.relative_to(vault_dir)
                current_page = rel_path.stem
                with open(path, 'r', encoding='utf-8') as f:
                    md_text = f.read()
                links = re.findall(r'(?<!\!)\[\[(.+?)\]\]', md_text)
                for link in links:
                    linked_page = link.strip()
                    if linked_page in internal_link_map:
                        graph[current_page].add(linked_page)
                        graph[linked_page].add(current_page)
    # Process files
    directory_links = {}
    all_md_links = []
    for root, dirs, files in os.walk(vault_dir):
        if '.obsidian' in dirs:
            dirs.remove('.obsidian')
        for file in files:
            path = Path(root) / file
            rel_path = path.relative_to(vault_dir)
            if file.lower().endswith(".md"):
                process_file(path, rel_path)
                parent_dir = rel_path.parent
                directory_links.setdefault(parent_dir, []).append(rel_path.stem)
                all_md_links.append(rel_path.with_suffix("").as_posix())
            else:
                copy_asset_with_cache(path)
    # Create directory indexes
    for directory, links in directory_links.items():
        create_directory_index(directory, directory.name, links)
    # Create top index
    create_top_index(all_md_links)

if __name__ == "__main__":
    prepare_output()
    build_site()
    print("Build complete.")