HM网站复制器软件+源码

HM网站复制器软件+源码

📅 2025年07月12日 👀 64 浏览 python 软件 精品软件 源代码

图片


HM网站复制器 v1.0.0
Copyright © 2024 Anzai. All rights reserved.
"""

import os
import sys
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
import mimetypes
import re
import threading
from datetime import datetime
import customtkinter as ctk
from tkinter import filedialog, messagebox
import webbrowser
from playwright.sync_api import sync_playwright, TimeoutError
import time
import json

# 设置主题和外观
ctk.set_appearance_mode("dark")
ctk.set_default_color_theme("blue")

class WebpageDownloader:
    def __init__(self, url, save_dir='downloaded_webpage', progress_callback=None):
        self.url = url
        self.save_dir = save_dir
        self.base_url = '{uri.scheme}://{uri.netloc}'.format(uri=urlparse(url))
        self.resources = {}  # 存储URL到本地路径的映射
        self.session = requests.Session()
        # 禁用SSL证书验证
        self.session.verify = False
        # 禁用SSL警告
        requests.packages.urllib3.disable_warnings()
        self.progress_callback = progress_callback
        self.recorded_behaviors = {
            'animations': [],
            'popups': [],
            'interactions': [],
            'scripts': []
        }

    def preload_with_playwright(self):
        """使用Playwright预加载网页以执行JavaScript"""
        try:
            if self.progress_callback:
                self.progress_callback("正在使用Playwright加载页面...")

            with sync_playwright() as p:
                browser = p.chromium.launch(headless=True, args=['--ignore-certificate-errors'])
                context = browser.new_context(ignore_https_errors=True)
                page = context.new_page()

                try:
                    # 设置超时时间为30秒
                    page.set_default_timeout(30000)
                    # 访问页面
                    page.goto(self.url)
                    # 等待页面加载完成
                    page.wait_for_load_state('networkidle')
                    # 额外等待一秒确保动态内容加载
                    time.sleep(1)

                    # 获取页面内容
                    content = page.content()

                    # 记录页面行为
                    self.recorded_behaviors = {
                        'animations': page.evaluate('() => { return Array.from(document.getElementsByTagName("*")).filter(el => window.getComputedStyle(el).animation !== "none").map(el => el.outerHTML) }'),
                        'popups': page.evaluate('() => { return Array.from(document.querySelectorAll("[class*=popup], [class*=modal], [id*=popup], [id*=modal]")).map(el => el.outerHTML) }'),
                        'interactions': page.evaluate('() => { return Array.from(document.querySelectorAll("[onclick], [onmouseover], [onmouseout], [onchange]")).map(el => ({ element: el.outerHTML, events: { onclick: el.onclick, onmouseover: el.onmouseover, onmouseout: el.onmouseout, onchange: el.onchange } })) }'),
                        'scripts': page.evaluate('() => { return Array.from(document.getElementsByTagName("script")).map(s => s.outerHTML) }')
                    }

                    if self.progress_callback:
                        self.progress_callback("页面预加载完成")

                    return content

                except TimeoutError:
                    if self.progress_callback:
                        self.progress_callback("页面加载超时")
                    return None
                finally:
                    context.close()
                    browser.close()

        except Exception as e:
            if self.progress_callback:
                self.progress_callback(f"Playwright预加载失败: {str(e)}")
            return None

    def download_resources(self, html_content):
        """在单独的线程中下载资源"""
        try:
            soup = BeautifulSoup(html_content, 'html.parser')

            # 添加导航JavaScript
            nav_script = soup.new_tag('script')
            nav_script.string = '''
                document.addEventListener('DOMContentLoaded', function() {
                    function scrollToSection(text) {
                        // 根据文本内容查找对应的区域
                        const allElements = document.querySelectorAll('*');
                        let targetSection = null;

                        for (const element of allElements) {
                            if (element.textContent.trim() === text) {
                                // 向上查找最近的section或div容器
                                let parent = element;
                                while (parent && !targetSection) {
                                    if (parent.tagName === 'SECTION' || 
                                        (parent.tagName === 'DIV' && parent.className.includes('section'))) {
                                        targetSection = parent;
                                        break;
                                    }
                                    parent = parent.parentElement;
                                }
                                if (targetSection) break;
                            }
                        }

                        if (targetSection) {
                            const offset = 100; // 偏移量,避免导航栏遮挡
                            const targetPosition = targetSection.offsetTop - offset;

                            window.scrollTo({
                                top: targetPosition,
                                behavior: 'smooth'
                            });

                            // 添加高亮效果
                            targetSection.style.backgroundColor = 'rgba(11, 195, 107, 0.1)';
                            setTimeout(() => {
                                targetSection.style.backgroundColor = '';
                            }, 1000);
                        }
                    }

                    // 处理所有可能的导航元素
                    const navItems = document.querySelectorAll('[class*="menuItem"], [class*="menu-item"], .cursor-pointer, li.px-[16px], .nav-item, span[role="button"], a[href^="#"]');

                    navItems.forEach(function(element) {
                        element.style.cursor = 'pointer';
                        element.addEventListener('click', function(e) {
                            e.preventDefault();
                            const text = this.textContent.trim();
                            if (text === '品牌简介' || text === '优势特点' || text === '支持地区') {
                                scrollToSection(text);
                            }
                        });
                    });
                });
            '''

            # 将script标签添加到head中
            head = soup.find('head')
            if head:
                head.append(nav_script)
            else:
                soup.insert(0, nav_script)

            # 处理CSS文件和图标
            for link in soup.find_all('link'):
                href = link.get('href')
                if href:
                    if link.get('rel') and ('stylesheet' in link['rel'] or 'icon' in link['rel'] or 'shortcut icon' in link['rel'] or 'apple-touch-icon' in link['rel']):
                        new_path = self.download_resource(href, 'css')
                        if new_path:
                            link['href'] = new_path
                            if self.progress_callback:
                                self.progress_callback(f"已下载: {new_path}")

            # 处理JavaScript文件
            for js in soup.find_all('script', src=True):
                new_path = self.download_resource(js['src'], 'js')
                if new_path:
                    js['src'] = new_path
                    if self.progress_callback:
                        self.progress_callback(f"已下载: {new_path}")

            # 处理图片和SVG
            for img in soup.find_all(['img', 'source', 'image']):
                # 处理src属性
                if img.has_attr('src'):
                    src = img['src']
                    if not src.startswith(('data:', '#', 'javascript:')):
                        new_path = self.download_resource(src, 'img')
                        if new_path:
                            img['src'] = new_path
                            if self.progress_callback:
                                self.progress_callback(f"已下载: {new_path}")

                # 处理srcset属性
                if img.has_attr('srcset'):
                    srcset = img['srcset'].split(',')
                    new_srcset = []
                    for src_item in srcset:
                        src_parts = src_item.strip().split()
                        if len(src_parts) > 0 and not src_parts[0].startswith(('data:', '#', 'javascript:')):
                            new_path = self.download_resource(src_parts[0], 'img')
                            if new_path:
                                if len(src_parts) > 1:
                                    new_srcset.append(f"{new_path} {src_parts[1]}")
                                else:
                                    new_srcset.append(new_path)
                    if new_srcset:
                        img['srcset'] = ', '.join(new_srcset)

            # 处理data-src和其他图片相关属性
            img_attrs = ['data-src', 'data-original', 'data-bg', 'data-background']
            for elem in soup.find_all():
                for attr in img_attrs:
                    if elem.has_attr(attr):
                        src = elem[attr]
                        if not src.startswith(('data:', '#', 'javascript:')):
                            new_path = self.download_resource(src, 'img')
                            if new_path:
                                elem[attr] = new_path
                                if self.progress_callback:
                                    self.progress_callback(f"已下载: {new_path}")

            # 处理srcset属性
            for elem in soup.find_all(['img', 'source'], srcset=True):
                srcset = elem['srcset'].split(',')
                new_srcset = []
                for src_item in srcset:
                    src_parts = src_item.strip().split()
                    if len(src_parts) > 0:
                        new_path = self.download_resource(src_parts[0], 'img')
                        if new_path:
                            if len(src_parts) > 1:
                                new_srcset.append(f"{new_path} {src_parts[1]}")
                            else:
                                new_srcset.append(new_path)
                if new_srcset:
                    elem['srcset'] = ', '.join(new_srcset)

            # 处理内联样式中的URL
            for tag in soup.find_all(style=True):
                style = tag['style']
                urls = re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style)
                for url in urls:
                    if url.startswith('data:'):
                        continue
                    new_path = self.download_resource(url, 'style')
                    if new_path:
                        style = style.replace(url, new_path)
                        tag['style'] = style

            # 处理CSS文件中的字体和图片URL
            for link in soup.find_all('link', rel='stylesheet'):
                href = link.get('href')
                if href and not href.startswith('data:'):
                    try:
                        css_url = urljoin(self.base_url, href)
                        css_content = self.session.get(css_url).text

                        # 查找所有@font-face规则中的字体文件
                        font_urls = re.findall(r'url\([\'"]?(.*?)[\'"]?\)', css_content)
                        for font_url in font_urls:
                            if font_url.startswith('data:'):
                                continue

                            # 处理字体URL
                            if font_url.startswith('//'):
                                font_url = 'https:' + font_url
                            elif not font_url.startswith(('http://', 'https://')):
                                font_url = urljoin(os.path.dirname(css_url) + '/', font_url.strip('\'"'))

                            # 移除URL中的查询参数和锚点
                            font_url = font_url.split('?')[0].split('#')[0]

                            # 下载字体文件
                            new_path = self.download_resource(font_url, 'fonts')
                            if new_path:
                                # 替换CSS中的字体URL
                                old_url = font_url.replace('\\', '\\\\')  # 处理CSS中的转义字符
                                css_content = css_content.replace(f'url({old_url})', f'url({new_path})')
                                css_content = css_content.replace(f"url('{old_url}')", f'url({new_path})')
                                css_content = css_content.replace(f'url("{old_url}")', f'url({new_path})')
                                if self.progress_callback:
                                    self.progress_callback(f"已下载: {new_path}")

                        # 保存更新后的CSS文件
                        css_path = os.path.join(self.save_dir, self.get_relative_path(href))
                        os.makedirs(os.path.dirname(css_path), exist_ok=True)
                        with open(css_path, 'w', encoding='utf-8') as f:
                            f.write(css_content)
                    except Exception as e:
                        if self.progress_callback:
                            self.progress_callback(f"处理CSS文件时出错: {str(e)}")

            # 保存更新后的HTML
            html_path = os.path.join(self.save_dir, 'index.html')
            with open(html_path, 'w', encoding='utf-8') as f:
                f.write(str(soup))

            if self.progress_callback:
                self.progress_callback(f"HTML文件已保存: {html_path}")

        except Exception as e:
            if self.progress_callback:
                self.progress_callback(f"下载资源时出错: {str(e)}")

    def download_resource(self, url, resource_type):
        """下载单个资源"""
        # 清理URL
        url = self.clean_url(url)
        if not url:
            return None

        # 移除URL中的查询字符串用于缓存键
        cache_url = url.split('?')[0] if '?' in url else url

        try:
            # 处理相对URL
            absolute_url = url if url.startswith(('http://', 'https://')) else urljoin(self.base_url, url)

            # 如果已经下载过,直接返回缓存的路径
            cache_key = urljoin(self.base_url, cache_url)
            if cache_key in self.resources:
                return self.resources[cache_key]

            # 忽略特定域名的资源
            ignored_domains = [
                'hm.baidu.com',
                'www.googletagmanager.com',
                'www.google-analytics.com',
                'connect.facebook.net',
                'platform.twitter.com',
                'assets.salesmartly.com',
            ]

            parsed_url = urlparse(absolute_url)
            if parsed_url.netloc in ignored_domains:
                return None

            # 下载资源
            response = self.session.get(absolute_url, timeout=10)
            if response.status_code != 200:
                return None

            # 获取相对路径
            relative_path = self.get_relative_path(url)
            if not relative_path or relative_path.endswith('/'):
                content_type = response.headers.get('content-type', '')
                ext = mimetypes.guess_extension(content_type.split(';')[0].strip())

                # 处理字体文件的扩展名
                if resource_type == 'fonts':
                    if 'woff2' in content_type:
                        ext = '.woff2'
                    elif 'woff' in content_type:
                        ext = '.woff'
                    elif 'ttf' in content_type or 'truetype' in content_type:
                        ext = '.ttf'
                    elif 'otf' in content_type or 'opentype' in content_type:
                        ext = '.otf'
                    elif not ext:
                        # 从URL中猜测扩展名
                        url_ext = os.path.splitext(url)[1].lower()
                        if url_ext in ['.woff2', '.woff', '.ttf', '.otf']:
                            ext = url_ext
                        else:
                            ext = '.woff'  # 默认使用.woff

                filename = f"{resource_type}_{len(self.resources)}{ext if ext else '.txt'}"
                relative_path = os.path.join(resource_type, filename)

            # 确保文件扩展名正确
            if resource_type == 'js' and not relative_path.endswith('.js'):
                relative_path += '.js'
            elif resource_type == 'css' and not relative_path.endswith('.css'):
                relative_path += '.css'

            # 创建保存路径
            save_path = os.path.join(self.save_dir, relative_path)
            save_dir = os.path.dirname(save_path)
            os.makedirs(save_dir, exist_ok=True)

            # 保存文件
            with open(save_path, 'wb') as f:
                f.write(response.content)

            # 统一使用正斜杠的相对路径
            relative_path = relative_path.replace('\\', '/')

            # 保存到资源集合中
            cache_key = urljoin(self.base_url, cache_url)
            self.resources[cache_key] = relative_path

            return relative_path

        except Exception as e:
            if self.progress_callback:
                self.progress_callback(f"下载资源时出错 {url}: {str(e)}")
            return None

    def create_directory(self):
        """创建保存文件的目录"""
        if not os.path.exists(self.save_dir):
            os.makedirs(self.save_dir)

    def get_relative_path(self, url):
        """获取资源相对于网站根目录的路径"""
        try:
            # 解码URL
            from urllib.parse import unquote
            url = unquote(url)

            if url.startswith(('http://', 'https://')):
                parsed_url = urlparse(url)
                # 如果是同一域名下的资源
                if parsed_url.netloc == urlparse(self.base_url).netloc:
                    path = parsed_url.path.lstrip('/')
                # 如果是外部资源,保存在external目录下
                else:
                    path = parsed_url.path.lstrip('/')
                    if not path:
                        path = 'index.html'
                    path = os.path.join('external', parsed_url.netloc, path)
            else:
                # 处理相对路径
                path = url.lstrip('/')

            # 移除查询参数中的特殊字符
            if '?' in path:
                base_path = path.split('?')[0]
                query = path.split('?')[1]
                # 将查询参数转换为安全的文件名
                safe_query = re.sub(r'[^\w\-\.]', '_', query)
                # 限制查询参数长度
                if len(safe_query) > 50:
                    safe_query = safe_query[:50]
                path = f"{base_path}_{safe_query}"

            # 确保路径中没有无效字符
            path = re.sub(r'[<>:"|?*]', '_', path)
            # 限制文件名长度
            if len(os.path.basename(path)) > 100:
                name, ext = os.path.splitext(os.path.basename(path))
                path = os.path.join(os.path.dirname(path), name[:95] + ext)

            return path

        except Exception as e:
            if self.progress_callback:
                self.progress_callback(f"处理路径失败: {str(e)}")
            # 生成一个随机文件名作为后备方案
            import uuid
            return f"resource_{uuid.uuid4().hex[:8]}"

        # 处理查询参数
        if '?' in path:
            base_path = path.split('?')[0]
            query = path.split('?')[1]
            # 将查询参数作为文件名的一部分
            dir_name = os.path.dirname(base_path)
            file_name = os.path.basename(base_path)
            name, ext = os.path.splitext(file_name)
            # 将查询参数转换为文件名的一部分
            safe_query = re.sub(r'[=&]', '-', query)
            safe_query = re.sub(r'[^\w\-\.]', '_', safe_query)
            new_name = f"{name}-{safe_query}{ext}"
            path = os.path.join(dir_name, new_name) if dir_name else new_name

        return path

    def clean_url(self, url):
        """清理URL,移除JavaScript变量和无效字符"""
        if not url:
            return None

        # 忽略数据URL和锚点
        if url.startswith(('data:', '#', 'javascript:', 'about:', 'mailto:', 'tel:')):
            return None

        # 解码URL中的百分号编码
        try:
            from urllib.parse import unquote
            url = unquote(url)
        except Exception as e:
            if self.progress_callback:
                self.progress_callback(f"URL解码失败: {str(e)}")
            return None

        # 忽略包含JavaScript变量的URL
        if any(x in url for x in [
            '{{', '}}', '+imageBaseUrl+', '"+imageBaseUrl+"',
            'replaceImg3X', '${', '}', 'ratio'
        ]):
            return None

        # 移除URL中的引号
        url = url.strip('"\'')

        # 忽略无效的URL
        if url.startswith(('javascript:', 'about:', 'mailto:', 'tel:')):
            return None

        return url

    def download(self):
        """开始下载网页及其资源"""
        try:
            # 创建保存目录
            self.create_directory()

            if self.progress_callback:
                self.progress_callback("开始下载主页面...")

            # 使用Playwright预加载网页
            html_content = self.preload_with_playwright()
            if not html_content:
                if self.progress_callback:
                    self.progress_callback("预加载失败,使用普通方式下载...")
                # 如果预加载失败,使用普通方式获取
                response = self.session.get(self.url, timeout=10)
                if response.status_code != 200:
                    raise Exception(f"无法访问网页,状态码: {response.status_code}")
                response.encoding = response.apparent_encoding
                html_content = response.text

            if self.progress_callback:
                self.progress_callback("正在处理HTML内容...")

            # 下载资源并处理HTML内容
            self.download_resources(html_content)

            # 获取HTML文件的相对路径
            html_path = self.get_relative_path(self.url)
            if not html_path or html_path.endswith('/'):
                html_path = 'index.html'

            # 保存HTML
            html_save_path = os.path.join(self.save_dir, html_path)
            os.makedirs(os.path.dirname(html_save_path), exist_ok=True)

            if self.progress_callback:
                self.progress_callback(f"网页下载完成!保存在目录: {self.save_dir}")
                self.progress_callback(f"共下载了 {len(self.resources)} 个资源文件")

        except Exception as e:
            if self.progress_callback:
                self.progress_callback(f"下载过程中出错: {str(e)}")

class ModernGUI(ctk.CTk):
    def __init__(self):
        super().__init__()

        # 配置窗口
        self.title("HM网站复制器 官网:www.hmjisu.com")
        self.geometry("800x600")

        # 设置窗口图标
        try:
            if os.path.exists('app.ico'):
                self.after(100, lambda: self.tk.call('wm', 'iconbitmap', self._w, os.path.abspath('app.ico')))
        except Exception as e:
            print(f"设置图标出错: {e}")

        # 创建主框架
        self.grid_rowconfigure(1, weight=1)
        self.grid_columnconfigure(0, weight=1)

        # 创建顶部卡片
        self.create_input_card()

        # 创建日志卡片
        self.create_log_card()

        # 创建状态栏
        self.create_status_bar()

        # 绑定URL输入框事件
        self.url_entry.bind('<KeyRelease>', self.update_save_dir)

    def create_input_card(self):
        # 创建输入卡片框架
        input_card = ctk.CTkFrame(self)
        input_card.grid(row=0, column=0, padx=20, pady=20, sticky="ew")
        input_card.grid_columnconfigure(0, weight=1)

        # URL输入框
        url_frame = ctk.CTkFrame(input_card, fg_color="transparent")
        url_frame.grid(row=0, column=0, padx=20, pady=(20,10), sticky="ew")
        url_frame.grid_columnconfigure(0, weight=1)

        url_label = ctk.CTkLabel(url_frame, text="网页地址", font=("Helvetica", 12))
        url_label.grid(row=0, column=0, sticky="w", pady=(0,5))

        self.url_entry = ctk.CTkEntry(url_frame, 
                                    placeholder_text="输入网页地址 (https://...)",
                                    height=35)
        self.url_entry.grid(row=1, column=0, sticky="ew")

        # 保存位置选择
        dir_frame = ctk.CTkFrame(input_card, fg_color="transparent")
        dir_frame.grid(row=1, column=0, padx=20, pady=10, sticky="ew")
        dir_frame.grid_columnconfigure(0, weight=1)

        dir_label = ctk.CTkLabel(dir_frame, text="保存位置", font=("Helvetica", 12))
        dir_label.grid(row=0, column=0, sticky="w", pady=(0,5))

        dir_select_frame = ctk.CTkFrame(dir_frame, fg_color="transparent")
        dir_select_frame.grid(row=1, column=0, sticky="ew")
        dir_select_frame.grid_columnconfigure(0, weight=1)

        self.dir_entry = ctk.CTkEntry(dir_select_frame, 
                                    placeholder_text="选择保存位置",
                                    height=35)
        self.dir_entry.grid(row=0, column=0, sticky="ew", padx=(0,10))

        browse_btn = ctk.CTkButton(dir_select_frame, 
                                 text="浏览",
                                 width=100,
                                 height=35,
                                 command=self.browse_directory)
        browse_btn.grid(row=0, column=1)

        # 按钮框架
        button_frame = ctk.CTkFrame(input_card, fg_color="transparent")
        button_frame.grid(row=2, column=0, padx=20, pady=20, sticky="ew")
        button_frame.grid_columnconfigure((0,1), weight=1)

        # 预览按钮
        self.preview_btn = ctk.CTkButton(button_frame,
                                       text="预览网页",
                                       height=40,
                                       command=self.preview_webpage)
        self.preview_btn.grid(row=0, column=0, padx=(0,10), sticky="ew")

        # 下载按钮
        self.download_btn = ctk.CTkButton(button_frame,
                                        text="开始下载",
                                        height=40,
                                        command=self.start_download)
        self.download_btn.grid(row=0, column=1, padx=(10,0), sticky="ew")

    def update_save_dir(self, event=None):
        """根据输入的URL更新保存目录"""
        url = self.url_entry.get().strip()
        if url and url.startswith(('http://', 'https://')):
            try:
                domain = urlparse(url).netloc
                # 移除端口号(如果有)
                domain = domain.split(':')[0]
                # 移除www前缀(如果有)
                if domain.startswith('www.'):
                    domain = domain[4:]
                # 替换特殊字符
                domain = re.sub(r'[<>:"/\\|?*]', '_', domain)
                self.dir_entry.delete(0, "end")
                self.dir_entry.insert(0, domain)
            except:
                pass

    def create_log_card(self):
        # 创建日志卡片框架
        log_card = ctk.CTkFrame(self)
        log_card.grid(row=1, column=0, padx=20, pady=(0,20), sticky="nsew")
        log_card.grid_rowconfigure(1, weight=1)
        log_card.grid_columnconfigure(0, weight=1)

        # 日志标题
        log_label = ctk.CTkLabel(log_card, 
                               text="下载进度", 
                               font=("Helvetica", 14, "bold"))
        log_label.grid(row=0, column=0, padx=20, pady=15, sticky="w")

        # 日志文本框
        self.log_text = ctk.CTkTextbox(log_card, 
                                     wrap="word",
                                     font=("Consolas", 12))
        self.log_text.grid(row=1, column=0, padx=20, pady=(0,20), sticky="nsew")

    def create_status_bar(self):
        # 创建状态栏框架
        status_frame = ctk.CTkFrame(self, fg_color="transparent")
        status_frame.grid(row=2, column=0, padx=20, pady=(0,10), sticky="ew")
        status_frame.grid_columnconfigure(1, weight=1)  # 中间空白部分可以伸展

        # 版权信息
        copyright_label = ctk.CTkLabel(status_frame,
                                   text="© 2024 Anzai",
                                   height=25,
                                   anchor="w")
        copyright_label.grid(row=0, column=0, sticky="w")

        # 状态文本
        self.status_label = ctk.CTkLabel(status_frame,
                                       text="准备就绪",
                                       height=25,
                                       anchor="w")
        self.status_label.grid(row=0, column=1, padx=20, sticky="w")

        # 官网链接
        website_link = ctk.CTkLabel(status_frame,
                                  text="访问官网: www.hmjisu.com",
                                  height=25,
                                  anchor="e",
                                  cursor="hand2",
                                  text_color=("#1a0dab", "#4e8cff"))  # 蓝色链接颜色
        website_link.grid(row=0, column=2, sticky="e")
        website_link.bind("<Button-1>", lambda e: webbrowser.open("https://www.hmjisu.com"))

        # 鼠标悬停效果
        def on_enter(e):
            website_link.configure(font=("Helvetica", 12, "underline"))
        def on_leave(e):
            website_link.configure(font=("Helvetica", 12))

        website_link.bind("<Enter>", on_enter)
        website_link.bind("<Leave>", on_leave)

    def browse_directory(self):
        directory = filedialog.askdirectory()
        if directory:
            self.dir_entry.delete(0, "end")
            self.dir_entry.insert(0, directory)

    def update_log(self, message):
        self.log_text.insert("end", f"[{datetime.now().strftime('%H:%M:%S')}] {message}\n")
        self.log_text.see("end")
        self.status_label.configure(text=message)

    def preview_webpage(self):
        """预览已下载的网页"""
        save_dir = self.dir_entry.get().strip()

        if not save_dir:
            messagebox.showerror("错误", "请先选择或输入保存位置")
            return

        index_path = os.path.join(save_dir, 'index.html')

        if not os.path.exists(index_path):
            messagebox.showerror("错误", "未找到已下载的网页,请先下载网页")
            return

        try:
            # 将路径转换为文件URL
            file_url = 'file:///' + os.path.abspath(index_path).replace('\\', '/')
            webbrowser.open(file_url)
        except Exception as e:
            messagebox.showerror("错误", f"无法打开网页:{str(e)}")

    def start_download(self):
        url = self.url_entry.get().strip()
        save_dir = self.dir_entry.get().strip()

        if not url:
            messagebox.showerror("错误", "请输入网页URL")
            return

        if not url.startswith(('http://', 'https://')):
            messagebox.showerror("错误", "请输入有效的URL(以http://或https://开头)")
            return

        self.download_btn.configure(state="disabled")
        self.log_text.delete("0.0", "end")
        self.update_log("开始下载...")

        def download_thread():
            downloader = WebpageDownloader(url, save_dir, self.update_log)
            downloader.download()
            self.after(0, lambda: self.download_btn.configure(state="normal"))

        thread = threading.Thread(target=download_thread)
        thread.daemon = True
        thread.start()

if __name__ == "__main__":
    app = ModernGUI()
    app.mainloop()
发布于 2025年07月12日 更新于 2025年08月26日
← 返回首页

评论 (0)

暂无评论,快来发表第一条评论吧!