feat(variables):重构变量服务为 Redis 实现并增强作用域支持

- 将变量服务从 HTTP 接口迁移至 Redis 存储,提升性能与可靠性
- 支持显式作用域前缀:site:xxx 和 global:xxx
- 实现变量 TTL 与一次性读取功能(var_ttl、var_single_use)
- 新增 VariableScope 枚举与 VariableTarget 缓存键设计
- 改进 VariableResolver 缓存机制以兼容作用域隔离
- 更新 README 文档说明新变量语法与使用示例
- 移除 settings 中已弃用的 variable_service_url 配置项
- 调整 ActionRegistry 自动注册逻辑以适配模块化扫描
- 统一浏览器选择器模式分隔符由 '=' 改为 ':'
- 优化浏览器元素等待与属性设置的容错处理逻辑
This commit is contained in:
2025-10-20 21:47:58 +08:00
parent 952c90e537
commit f8370eb85e
11 changed files with 734 additions and 104 deletions

198
schema/xspider.xsd Normal file
View File

@@ -0,0 +1,198 @@
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
elementFormDefault="qualified"
attributeFormDefault="unqualified">
<xs:annotation>
<xs:documentation>
xspider XML 模板 Schema用于在编辑器中提示必填项、枚举值与作用域规则。
适用于描述站点配置、流程、动作与抽取字段。
</xs:documentation>
</xs:annotation>
<!-- 基础类型 -->
<xs:simpleType name="BooleanFlag">
<xs:annotation>
<xs:documentation>
支持 true/false, 1/0, yes/no, on/off。
</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="true"/>
<xs:enumeration value="false"/>
<xs:enumeration value="1"/>
<xs:enumeration value="0"/>
<xs:enumeration value="yes"/>
<xs:enumeration value="no"/>
<xs:enumeration value="on"/>
<xs:enumeration value="off"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="SelectorMode">
<xs:restriction base="xs:string">
<xs:enumeration value="css"/>
<xs:enumeration value="xpath"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="UniqueKeysMode">
<xs:restriction base="xs:string">
<xs:enumeration value="all"/>
<xs:enumeration value="custom"/>
<xs:enumeration value="null"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="NonNegativeInt">
<xs:restriction base="xs:nonNegativeInteger"/>
</xs:simpleType>
<!-- Header -->
<xs:complexType name="HeaderType">
<xs:annotation>
<xs:documentation>HTTP 请求头设置</xs:documentation>
</xs:annotation>
<xs:attribute name="name" type="xs:string" use="required"/>
<xs:attribute name="value" type="xs:string" use="optional"/>
</xs:complexType>
<!-- Config -->
<xs:complexType name="ConfigType">
<xs:sequence>
<xs:element name="header" type="HeaderType" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
<xs:attribute name="enable_proxy" type="BooleanFlag" default="false"/>
<xs:attribute name="rotate_ua" type="BooleanFlag" default="false"/>
<xs:attribute name="retry" type="xs:nonNegativeInteger" default="3"/>
</xs:complexType>
<!-- Action -->
<xs:complexType name="ActionType" mixed="true">
<xs:annotation>
<xs:documentation>
动作执行配置。内置类型包含goto、click、type、wait_dom_show、
wait_dom_gone、wait_dom_hide、wait_time、run_js、set_header、set_attr、
set_var、captcha。可按需扩展自定义类型。
</xs:documentation>
</xs:annotation>
<xs:sequence/>
<xs:attribute name="type" type="xs:string" use="required"/>
<xs:attribute name="selector" type="xs:string"/>
<xs:attribute name="mode" type="SelectorMode" default="xpath"/>
<xs:attribute name="timeout_ms" type="NonNegativeInt"/>
<xs:attribute name="after_wait" type="NonNegativeInt" default="0"/>
<xs:anyAttribute processContents="lax"/>
</xs:complexType>
<!-- Field -->
<xs:complexType name="FieldType">
<xs:annotation>
<xs:documentation>字段抽取规则selector 可为 CSS 或 XPath。</xs:documentation>
</xs:annotation>
<xs:attribute name="name" type="xs:string" use="required"/>
<xs:attribute name="selector" type="xs:string" use="required"/>
<xs:attribute name="mode" type="SelectorMode" default="css"/>
<xs:attribute name="value_type" type="xs:string"/>
</xs:complexType>
<!-- Download -->
<xs:complexType name="DownloadType">
<xs:annotation>
<xs:documentation>下载附件配置,将所有属性传入下载器。</xs:documentation>
</xs:annotation>
<xs:sequence/>
<xs:anyAttribute processContents="lax"/>
</xs:complexType>
<!-- Extract -->
<xs:complexType name="ExtractType">
<xs:annotation>
<xs:documentation>
表格/列表抽取配置。record_css 或 record_xpath 至少填写一个。
</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="field" type="FieldType" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="download" type="DownloadType" minOccurs="0"/>
</xs:sequence>
<xs:attribute name="record_css" type="xs:string"/>
<xs:attribute name="record_xpath" type="xs:string"/>
</xs:complexType>
<!-- Excel Extract -->
<xs:complexType name="ExcelExtractType">
<xs:annotation>
<xs:documentation>Excel 文件抽取配置file_pattern 与 pattern 二选一。</xs:documentation>
</xs:annotation>
<xs:sequence/>
<xs:attribute name="file_pattern" type="xs:string"/>
<xs:attribute name="pattern" type="xs:string"/>
<xs:attribute name="directory" type="xs:string"/>
</xs:complexType>
<!-- Pagination -->
<xs:complexType name="PaginateType">
<xs:annotation>
<xs:documentation>分页配置,可指定 XPath 或 CSS 选择器。</xs:documentation>
</xs:annotation>
<xs:sequence/>
<xs:attribute name="selector" type="xs:string"/>
<xs:attribute name="css" type="xs:string"/>
<xs:attribute name="mode" type="SelectorMode" default="xpath"/>
<xs:attribute name="max_pages" type="xs:nonNegativeInteger"/>
</xs:complexType>
<!-- Flow -->
<xs:complexType name="FlowType">
<xs:annotation>
<xs:documentation>
业务流程。建议至少配置 extract 或 excel_extract。
</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="action" type="ActionType" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="extract" type="ExtractType" minOccurs="0"/>
<xs:element name="excel_extract" type="ExcelExtractType" minOccurs="0"/>
<xs:element name="paginate" type="PaginateType" minOccurs="0"/>
</xs:sequence>
<xs:attribute name="id" type="xs:string"/>
<xs:attribute name="entry" type="xs:string"/>
<xs:attribute name="url" type="xs:string"/>
<xs:attribute name="data_type" type="xs:string"/>
<xs:attribute name="unique_keys" type="UniqueKeysMode" default="all"/>
<xs:attribute name="columns" type="xs:string"/>
<xs:anyAttribute processContents="lax"/>
</xs:complexType>
<xs:complexType name="FlowsType">
<xs:sequence>
<xs:element name="flow" type="FlowType" minOccurs="1" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
<xs:complexType name="LoginType">
<xs:annotation>
<xs:documentation>登录流程,结构与 FlowType 相同但允许缺少抽取步骤。</xs:documentation>
</xs:annotation>
<xs:complexContent>
<xs:extension base="FlowType"/>
</xs:complexContent>
</xs:complexType>
<!-- Root -->
<xs:complexType name="SiteType">
<xs:sequence>
<xs:element name="config" type="ConfigType" minOccurs="0"/>
<xs:element name="login" type="LoginType" minOccurs="0"/>
<xs:element name="flows" type="FlowsType" minOccurs="0"/>
</xs:sequence>
<xs:attribute name="id" type="xs:string" use="required"/>
<xs:attribute name="base" type="xs:string"/>
<xs:anyAttribute processContents="lax"/>
</xs:complexType>
<xs:element name="site" type="SiteType"/>
</xs:schema>