asyncd_filament_database

test

霍禺3.webp

test2


import os
import shutil
import markdown
import re
import json
from pathlib import Path
from PIL import Image
import time
from collections import deque, defaultdict
import subprocess

# 設定
vault_dir = Path("Obsidian_Vault_001")
output_dir = Path(r"Y:\nginx_site_data") / "asyncd_filament_database"
top_url  = "https://mizugane.tcpexposer.com/"
home_url = "https://mizugane.tcpexposer.com/asyncd_filament_database/"
site_title = "asyncd_filament_database"
max_depth=4


# HTML テンプレート
HEAD = """<html><head><meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel='icon' href='{top_url}img/favicon.ico'>
<link rel='stylesheet' href='{top_url}css/style2.css'>
{additional_css}
<script src="https://unpkg.com/budoux/bundle/budoux-ja.min.js"></script>

<meta property="og:title" content="{site_title}">
<meta property="og:description" content="{title}">
<meta property="og:image" content="{top_url}img/asyncd_filament_umekome.webp">
<meta property="og:url" content="{top_url}">
<meta property="og:type" content="website">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:title" content="{site_title}">

 """
SITE_TITLE = """
<div class=site_title ><a href="{home_url}"  >{site_title}</a></div>
<hr/>
"""
TITLE = """
<h1>{title}</h1>
""" 
TOP_TITLE = """
<h1>{site_title}</h1>
""" 
FOOT_INDEX = """  
<br><hr/><br><div class="footer-nav"> 
<a href="../">目次へ</a>  
<br><br><br><br>
</div>  
</body></html>"""

FOOT_TOP = """
<br><hr/><br>
<a href="{top_url}">トップページへ</a>
<br><br><br><br>
</div>
</body></html>"""

assets_dir = output_dir / "assets"

# Markdown 初期化
md = markdown.Markdown(
    extensions=[
        'extra',          # テーブル・フェンスコード対応
        'fenced_code',    # ``` を明示的に有効化
        'codehilite',     # シンタックスハイライト
    ],
    extension_configs={
        'codehilite': {'linenums': False}
    },
    output_format='html5'
)
internal_link_map = {}
graph = defaultdict(set)

# ── キャッシュディレクトリ&メタ情報読み込み ──
vault_name = vault_dir.name
cache_dir = vault_dir.parent / f"{vault_name}_tmp"
cache_dir.mkdir(parents=True, exist_ok=True)

cache_meta_path = cache_dir / "cache_meta.json"
if cache_meta_path.exists():
    with open(cache_meta_path, 'r', encoding='utf-8') as f:
        cache_meta = json.load(f)
else:
    cache_meta = {}

# ── オリジナル削除時はキャッシュもクリア ──
removed = []
for rel, meta in cache_meta.items():
    orig_path = vault_dir / rel
    if not orig_path.exists():
        tmp_file = cache_dir / meta['out_name']
        if tmp_file.exists():
            tmp_file.unlink()
        removed.append(rel)
for rel in removed:
    cache_meta.pop(rel, None)
if removed:
    with open(cache_meta_path, 'w', encoding='utf-8') as f:
        json.dump(cache_meta, f, indent=2)


def prepare_output():
    # ── output_dir の初期化はそのまま ──
    if output_dir.exists():
        if output_dir.is_file():
            output_dir.unlink()
        else:
            shutil.rmtree(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    # ── assets_dir の初期化 ──
    if assets_dir.exists():
        if assets_dir.is_file():
            assets_dir.unlink()          # ファイルなら削除
        else:
            shutil.rmtree(assets_dir)   # ディレクトリなら中身ごと削除
    assets_dir.mkdir(parents=True, exist_ok=True)



def replace_links(md_text):
    css_classes = []

    def repl_embed(match):
        filename_part = match.group(1)
        parts = filename_part.split('|', 1)
        base_filename = parts[0].strip()
        size_spec = parts[1].strip() if len(parts) > 1 else None

        base_name = os.path.basename(base_filename)
        file_ext = os.path.splitext(base_name)[1].lower()

        audio_ext = ('.wav', '.flac', '.ogg', '.m4a')
        video_ext = ('.mp4', '.mov', '.avi', '.mkv', '.webm')

        if file_ext in audio_ext:
            mp3_name = os.path.splitext(base_name)[0] + '.mp3'
            return f'<audio controls src="{home_url}assets/{mp3_name}"></audio>'

        if file_ext in video_ext:
            webm_name = os.path.splitext(base_name)[0] + '.webm'
            size_attr = ''
            if size_spec:
                w, h = size_spec.split('x',1)
                if w.isdigit() and h.isdigit():
                    size_attr = f' width="{w}" height="{h}"'
            return f'<video controls src="{home_url}assets/{webm_name}"{size_attr}></video>'

        if file_ext in ('.png', '.jpg', '.jpeg', '.webp'):
            width = height = None
            if size_spec:
                w, h = size_spec.split('x', 1)
                try:
                    width, height = int(w), int(h)
                except:
                    pass
            class_attr = ''
            if width and height:
                cls = f'img-w{width}-h{height}'
                css_classes.append((cls, width, height))
                class_attr = f' class="{cls}"'
            webp_name = os.path.splitext(base_name)[0] + ".webp"
            return f'<img src="{home_url}assets/{webp_name}" alt="{webp_name}"{class_attr}>'
        else:
            # その他ファイルはリンク
            return f'<a href="{home_url}assets/{base_name}">{base_name}</a>'
    def repl_internal(match):
        filename = match.group(1)
        link_path = internal_link_map.get(filename)
        if link_path:
            return f'<a href="{home_url}{link_path}/">{filename}</a>'
        else:
            return filename
    # マークダウン中の <a href="https://mizugane.tcpexposer.com/asyncd_filament_database/assets/...">...</a> を置換
    md_text = re.sub(r'!\[\[(.+?)\]\]', repl_embed, md_text)
    # 内部リンクはそのまま

    md_text = re.sub(r'(?<!\!)\[\[(.+?)\]\]', repl_internal, md_text)

    # CSSクラスの重複除去(既存処理)
    seen = set()
    unique = []
    for cls, w, h in css_classes:
        if (cls, w, h) not in seen:
            seen.add((cls, w, h))
            unique.append((cls, w, h))
    return md_text, unique



def create_parent_index(parent_dir, title, link):
    if '.obsidian' in str(parent_dir): 
        return
    parent_html_path = output_dir / parent_dir / "index.html"
    parent_html_path.parent.mkdir(parents=True, exist_ok=True)
    with open(parent_html_path, 'a', encoding='utf-8') as f:
        f.write(f'<a href="{home_url}{link}/">{title}</a><br>\n')

def create_directory_index(directory_path, title, links):
    directory_html_path = output_dir / directory_path / "index.html"
    directory_html_path.parent.mkdir(parents=True, exist_ok=True)
    full_dir_path = vault_dir / directory_path
    subdirs = []
    if full_dir_path.exists():
        subdirs = [item.name for item in full_dir_path.iterdir() if item.is_dir()]
    subdir_links_html = ""
    if subdirs:
        subdir_links_html = "<div class='directory-links-box'>\n"
        subdir_links_html += "<b>ディレクトリ</b><br>\n"
        for subdir in sorted(subdirs):
            subdir_url = f"{home_url}{(directory_path / subdir).as_posix()}/"
            subdir_links_html += f"<a href=\"{subdir_url}\">{subdir}</a><br>\n"
        subdir_links_html += "</div>\n"
    links_html = "\n".join([
        f'<a href=\"{home_url}{(directory_path / link).as_posix()}/\">{link}</a><br>'
        for link in links
    ])
    with open(directory_html_path, 'w', encoding='utf-8') as f:
        f.write(HEAD.format(title=title, top_url=top_url, site_title=site_title, additional_css="", home_url=home_url) + SITE_TITLE.format(home_url=home_url, site_title=site_title) + TITLE.format(title=title) + subdir_links_html + links_html + FOOT_INDEX)
def collect_nodes_and_links(start_node):
    visited = {}
    queue = deque([(start_node, 0)])
    visited[start_node] = 0
    while queue:
        node, depth = queue.popleft()
        if depth >= max_depth:
            continue
        for neighbor in graph.get(node, []):
            if neighbor not in visited or visited[neighbor] > depth + 1:
                visited[neighbor] = depth + 1
                queue.append((neighbor, depth + 1))
    nodes_in_scope = [n for n in visited if visited[n] <= max_depth]
    links_in_scope = []
    seen_edges = set()

    for node in nodes_in_scope:
        for neighbor in graph.get(node, []):
            if neighbor in nodes_in_scope:
                edge = tuple(sorted([node, neighbor]))
                if edge not in seen_edges:
                    seen_edges.add(edge)
                    links_in_scope.append({"source": edge[0], "target": edge[1]})

    return nodes_in_scope, links_in_scope


def process_file(md_path, rel_path):
    if '.obsidian' in str(md_path):
        return

    with open(md_path, 'r', encoding='utf-8') as f:
        md_text = f.read()

    md_text, css_classes = replace_links(md_text)
    md.reset()
    html_body = md.convert(md_text)
    html_body = re.sub(
    r'<p>\s*<a href="([^"]+\.mp3)">[^<]+</a>\s*</p>',
    r'<audio controls src="\1"></audio>',
    html_body
    )

    title = rel_path.stem
    parent_dir = rel_path.parent

    if parent_dir != Path('.'):
        create_parent_index(parent_dir, title, rel_path.with_suffix("").as_posix())

    output_page_dir = output_dir / rel_path.with_suffix("")
    output_path = output_page_dir / "index.html"
    output_path.parent.mkdir(parents=True, exist_ok=True)

    # CSSファイルの生成
    css_content = ""
    for cls, w, h in css_classes:
        css_content += f".{cls} {{\n  width: {w}px;\n  height: {h}px;\n}}\n"

    additional_css = ''
    if css_content:
        css_path = output_page_dir / "image_styles.css"
        with open(css_path, 'w', encoding='utf-8') as f:
            f.write(css_content)
        additional_css = '<link rel="stylesheet" href="image_styles.css">'

    # JSONデータ生成(既存処理)
    current_page = title
    nodes_in_scope, links_in_scope = collect_nodes_and_links(current_page)

    nodes_data = []
    for node in nodes_in_scope:
        node_path = internal_link_map.get(node)
        if node_path:
            url = f"{home_url}{node_path}"
            nodes_data.append({"id": node, "path": url})

    json_data = {
        "nodes": nodes_data,
        "links": links_in_scope
    }

    json_path = output_dir / rel_path.with_suffix("") / "data.json"
    with open(json_path, 'w', encoding='utf-8') as f:
        json.dump(json_data, f, indent=2)
    # Generate D3 visualization code
    d3_code = f"""
    <!-- SVG and D3.js Integration -->
    <div id="graph-container" 
        class="graph-container"
        data-current="{current_page}"
        data-path="data.json">
        <svg ></svg>
    </div>
    <script src="https://d3js.org/d3.v7.min.js"></script>
    <script src="{top_url}js/graph.js" type="module"></script>
    """
    head_content = HEAD.format(
        title=title,
        top_url=top_url,
        site_title=site_title,
        additional_css=additional_css,
        home_url=home_url
    )

    site_title_content = SITE_TITLE.format(home_url=home_url, site_title=site_title)

    title_content = TITLE.format(title=title)

    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(
            head_content 
            + site_title_content 
            + title_content 
            + "<budoux-ja>\n"
            + html_body 
            + "\n</budoux-ja>"
            + d3_code 
            + FOOT_INDEX
            )


def save_cache_meta():
    with open(cache_meta_path, 'w', encoding='utf-8') as f:
        json.dump(cache_meta, f, indent=2)

def copy_asset_with_cache(file_path: Path):
    rel = str(file_path.relative_to(vault_dir))
    mtime = file_path.stat().st_mtime
    ext = file_path.suffix.lower()
    # 拡張子に応じた出力名
    if ext in ('.png','.jpg','.jpeg','.webp'):
        out_name = file_path.stem + ".webp"
    elif ext in ('.wav','.flac','.ogg','.m4a'):
        out_name = file_path.stem + ".mp3"
    elif ext in ('.mp4','.mov','.avi','.mkv','.webm'):
        out_name = file_path.stem + ".webm"
    else:
        out_name = file_path.name

    # キャッシュヒット判定
    meta = cache_meta.get(rel)
    if meta and meta['mtime'] == mtime and (cache_dir/meta['out_name']).exists():
        # 既存キャッシュを assets_dir へコピー
        shutil.copy(cache_dir / meta['out_name'], assets_dir / out_name)
        return

    # キャッシュに無ければ新規圧縮 or コピー
    target_cache = cache_dir / out_name
    if ext in ('.png','.jpg','.jpeg','.webp'):
        img = Image.open(file_path)
        img.thumbnail((1024,1024), Image.LANCZOS)
        img.save(target_cache, 'WEBP', quality=85)
    elif ext in ('.mp4','.mov','.avi','.mkv','.webm'):
        subprocess.run([
            'ffmpeg','-y','-i',str(file_path),
            '-c:v','libvpx-vp9','-b:v','0','-crf','30',
            '-c:a','libopus', str(target_cache)
        ], check=True)
    elif ext in ('.wav','.flac','.ogg','.m4a'):
        subprocess.run([
            'ffmpeg','-y','-i',str(file_path),
            '-codec:a','libmp3lame','-b:a','64k', str(target_cache)
        ], check=True)
    else:
        shutil.copy(file_path, target_cache)

    # assets_dir へコピー
    shutil.copy(target_cache, assets_dir / out_name)
    # メタ情報を更新
    cache_meta[rel] = {"out_name": out_name, "mtime": mtime}
    save_cache_meta()

def create_top_index(all_md_paths):
    tree = {}
    for rel_path in all_md_paths:
        parts = Path(rel_path).parts
        current = tree
        for part in parts[:-1]:
            current = current.setdefault(part, {})
        current.setdefault('_files', []).append(parts[-1])
    def render_tree(node, depth=0):
        html = ""
        indent = "&nbsp;" * 4 * depth
        for file in sorted(node.get('_files', [])):
            file_path = '/'.join(node_path + [file])
            html += f"{indent}<a href=\"{home_url}{file_path}/\">{file}</a><br>\n"
        for dir_name in sorted(k for k in node.keys() if k != '_files'):
            node_path.append(dir_name)
            if '.obsidian' not in dir_name:
                html += f"{indent}<b>{dir_name}/</b><br>\n"
                html += render_tree(node[dir_name], depth + 1)
            node_path.pop()
        return html
    node_path = []
    body_html = render_tree(tree)
    parent_dir_links_html = "<div class='directory-links-box'>\n"
    parent_dir_links_html += "<b>ディレクトリ</b><br>\n"
    subdirs = sorted([d.name for d in Path(vault_dir).iterdir() if d.is_dir() and '.obsidian' not in d.name])
    for subdir in subdirs:
        subdir_url = f"{home_url}{subdir}/"
        parent_dir_links_html += f"<a href=\"{subdir_url}\">{subdir}</a><br>\n"
    parent_dir_links_html += "</div>\n"
    output_path = output_dir / "index.html"
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(
            HEAD.format(title=site_title, top_url=top_url, site_title=site_title, additional_css="", home_url=home_url)
            + TOP_TITLE.format(site_title=site_title)            
            + parent_dir_links_html
            + body_html
            + FOOT_TOP.format(top_url=top_url)
)


def build_site():
    global internal_link_map, graph
    # Build internal_link_map
    for root, dirs, files in os.walk(vault_dir):
        if '.obsidian' in dirs:
            dirs.remove('.obsidian')
        for file in files:
            if file.lower().endswith(".md"):
                path = Path(root) / file
                rel_path = path.relative_to(vault_dir)
                internal_link_map[file[:-3]] = rel_path.with_suffix("").as_posix()
    # Build graph
    graph = defaultdict(set)
    for root, dirs, files in os.walk(vault_dir):
        if '.obsidian' in dirs:
            dirs.remove('.obsidian')
        for file in files:
            if file.lower().endswith(".md"):
                path = Path(root) / file
                rel_path = path.relative_to(vault_dir)
                current_page = rel_path.stem
                with open(path, 'r', encoding='utf-8') as f:
                    md_text = f.read()
                links = re.findall(r'(?<!\!)\[\[(.+?)\]\]', md_text)
                for link in links:
                    linked_page = link.strip()
                    if linked_page in internal_link_map:
                        graph[current_page].add(linked_page)
                        graph[linked_page].add(current_page)
    # Process files
    directory_links = {}
    all_md_links = []
    for root, dirs, files in os.walk(vault_dir):
        if '.obsidian' in dirs:
            dirs.remove('.obsidian')
        for file in files:
            path = Path(root) / file
            rel_path = path.relative_to(vault_dir)
            if file.lower().endswith(".md"):
                process_file(path, rel_path)
                parent_dir = rel_path.parent
                directory_links.setdefault(parent_dir, []).append(rel_path.stem)
                all_md_links.append(rel_path.with_suffix("").as_posix())
            else:
                copy_asset_with_cache(path)
    # Create directory indexes
    for directory, links in directory_links.items():
        create_directory_index(directory, directory.name, links)
    # Create top index
    create_top_index(all_md_links)

if __name__ == "__main__":
    prepare_output()
    build_site()
    print("Build complete.")