feat(variables):重构变量服务为 Redis 实现并增强作用域支持
- 将变量服务从 HTTP 接口迁移至 Redis 存储,提升性能与可靠性 - 支持显式作用域前缀:site:xxx 和 global:xxx - 实现变量 TTL 与一次性读取功能(var_ttl、var_single_use) - 新增 VariableScope 枚举与 VariableTarget 缓存键设计 - 改进 VariableResolver 缓存机制以兼容作用域隔离 - 更新 README 文档说明新变量语法与使用示例 - 移除 settings 中已弃用的 variable_service_url 配置项 - 调整 ActionRegistry 自动注册逻辑以适配模块化扫描 - 统一浏览器选择器模式分隔符由 '=' 改为 ':' - 优化浏览器元素等待与属性设置的容错处理逻辑
This commit is contained in:
198
schema/xspider.xsd
Normal file
198
schema/xspider.xsd
Normal file
@@ -0,0 +1,198 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
elementFormDefault="qualified"
|
||||
attributeFormDefault="unqualified">
|
||||
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
xspider XML 模板 Schema,用于在编辑器中提示必填项、枚举值与作用域规则。
|
||||
适用于描述站点配置、流程、动作与抽取字段。
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
|
||||
<!-- 基础类型 -->
|
||||
<xs:simpleType name="BooleanFlag">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
支持 true/false, 1/0, yes/no, on/off。
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:restriction base="xs:string">
|
||||
<xs:enumeration value="true"/>
|
||||
<xs:enumeration value="false"/>
|
||||
<xs:enumeration value="1"/>
|
||||
<xs:enumeration value="0"/>
|
||||
<xs:enumeration value="yes"/>
|
||||
<xs:enumeration value="no"/>
|
||||
<xs:enumeration value="on"/>
|
||||
<xs:enumeration value="off"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
<xs:simpleType name="SelectorMode">
|
||||
<xs:restriction base="xs:string">
|
||||
<xs:enumeration value="css"/>
|
||||
<xs:enumeration value="xpath"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
<xs:simpleType name="UniqueKeysMode">
|
||||
<xs:restriction base="xs:string">
|
||||
<xs:enumeration value="all"/>
|
||||
<xs:enumeration value="custom"/>
|
||||
<xs:enumeration value="null"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
<xs:simpleType name="NonNegativeInt">
|
||||
<xs:restriction base="xs:nonNegativeInteger"/>
|
||||
</xs:simpleType>
|
||||
|
||||
<!-- Header -->
|
||||
<xs:complexType name="HeaderType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>HTTP 请求头设置</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:attribute name="name" type="xs:string" use="required"/>
|
||||
<xs:attribute name="value" type="xs:string" use="optional"/>
|
||||
</xs:complexType>
|
||||
|
||||
<!-- Config -->
|
||||
<xs:complexType name="ConfigType">
|
||||
<xs:sequence>
|
||||
<xs:element name="header" type="HeaderType" minOccurs="0" maxOccurs="unbounded"/>
|
||||
</xs:sequence>
|
||||
<xs:attribute name="enable_proxy" type="BooleanFlag" default="false"/>
|
||||
<xs:attribute name="rotate_ua" type="BooleanFlag" default="false"/>
|
||||
<xs:attribute name="retry" type="xs:nonNegativeInteger" default="3"/>
|
||||
</xs:complexType>
|
||||
|
||||
<!-- Action -->
|
||||
<xs:complexType name="ActionType" mixed="true">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
动作执行配置。内置类型包含:goto、click、type、wait_dom_show、
|
||||
wait_dom_gone、wait_dom_hide、wait_time、run_js、set_header、set_attr、
|
||||
set_var、captcha。可按需扩展自定义类型。
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:sequence/>
|
||||
<xs:attribute name="type" type="xs:string" use="required"/>
|
||||
<xs:attribute name="selector" type="xs:string"/>
|
||||
<xs:attribute name="mode" type="SelectorMode" default="xpath"/>
|
||||
<xs:attribute name="timeout_ms" type="NonNegativeInt"/>
|
||||
<xs:attribute name="after_wait" type="NonNegativeInt" default="0"/>
|
||||
<xs:anyAttribute processContents="lax"/>
|
||||
</xs:complexType>
|
||||
|
||||
<!-- Field -->
|
||||
<xs:complexType name="FieldType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>字段抽取规则,selector 可为 CSS 或 XPath。</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:attribute name="name" type="xs:string" use="required"/>
|
||||
<xs:attribute name="selector" type="xs:string" use="required"/>
|
||||
<xs:attribute name="mode" type="SelectorMode" default="css"/>
|
||||
<xs:attribute name="value_type" type="xs:string"/>
|
||||
</xs:complexType>
|
||||
|
||||
<!-- Download -->
|
||||
<xs:complexType name="DownloadType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>下载附件配置,将所有属性传入下载器。</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:sequence/>
|
||||
<xs:anyAttribute processContents="lax"/>
|
||||
</xs:complexType>
|
||||
|
||||
<!-- Extract -->
|
||||
<xs:complexType name="ExtractType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
表格/列表抽取配置。record_css 或 record_xpath 至少填写一个。
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:sequence>
|
||||
<xs:element name="field" type="FieldType" minOccurs="0" maxOccurs="unbounded"/>
|
||||
<xs:element name="download" type="DownloadType" minOccurs="0"/>
|
||||
</xs:sequence>
|
||||
<xs:attribute name="record_css" type="xs:string"/>
|
||||
<xs:attribute name="record_xpath" type="xs:string"/>
|
||||
</xs:complexType>
|
||||
|
||||
<!-- Excel Extract -->
|
||||
<xs:complexType name="ExcelExtractType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>Excel 文件抽取配置,file_pattern 与 pattern 二选一。</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:sequence/>
|
||||
<xs:attribute name="file_pattern" type="xs:string"/>
|
||||
<xs:attribute name="pattern" type="xs:string"/>
|
||||
<xs:attribute name="directory" type="xs:string"/>
|
||||
</xs:complexType>
|
||||
|
||||
<!-- Pagination -->
|
||||
<xs:complexType name="PaginateType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>分页配置,可指定 XPath 或 CSS 选择器。</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:sequence/>
|
||||
<xs:attribute name="selector" type="xs:string"/>
|
||||
<xs:attribute name="css" type="xs:string"/>
|
||||
<xs:attribute name="mode" type="SelectorMode" default="xpath"/>
|
||||
<xs:attribute name="max_pages" type="xs:nonNegativeInteger"/>
|
||||
</xs:complexType>
|
||||
|
||||
<!-- Flow -->
|
||||
<xs:complexType name="FlowType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
业务流程。建议至少配置 extract 或 excel_extract。
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:sequence>
|
||||
<xs:element name="action" type="ActionType" minOccurs="0" maxOccurs="unbounded"/>
|
||||
<xs:element name="extract" type="ExtractType" minOccurs="0"/>
|
||||
<xs:element name="excel_extract" type="ExcelExtractType" minOccurs="0"/>
|
||||
<xs:element name="paginate" type="PaginateType" minOccurs="0"/>
|
||||
</xs:sequence>
|
||||
<xs:attribute name="id" type="xs:string"/>
|
||||
<xs:attribute name="entry" type="xs:string"/>
|
||||
<xs:attribute name="url" type="xs:string"/>
|
||||
<xs:attribute name="data_type" type="xs:string"/>
|
||||
<xs:attribute name="unique_keys" type="UniqueKeysMode" default="all"/>
|
||||
<xs:attribute name="columns" type="xs:string"/>
|
||||
<xs:anyAttribute processContents="lax"/>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:complexType name="FlowsType">
|
||||
<xs:sequence>
|
||||
<xs:element name="flow" type="FlowType" minOccurs="1" maxOccurs="unbounded"/>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:complexType name="LoginType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>登录流程,结构与 FlowType 相同但允许缺少抽取步骤。</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:complexContent>
|
||||
<xs:extension base="FlowType"/>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
|
||||
<!-- Root -->
|
||||
<xs:complexType name="SiteType">
|
||||
<xs:sequence>
|
||||
<xs:element name="config" type="ConfigType" minOccurs="0"/>
|
||||
<xs:element name="login" type="LoginType" minOccurs="0"/>
|
||||
<xs:element name="flows" type="FlowsType" minOccurs="0"/>
|
||||
</xs:sequence>
|
||||
<xs:attribute name="id" type="xs:string" use="required"/>
|
||||
<xs:attribute name="base" type="xs:string"/>
|
||||
<xs:anyAttribute processContents="lax"/>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:element name="site" type="SiteType"/>
|
||||
|
||||
</xs:schema>
|
||||
|
||||
Reference in New Issue
Block a user