Files
xspider/schema/xspider.xsd
Flik f8370eb85e feat(variables):重构变量服务为 Redis 实现并增强作用域支持
- 将变量服务从 HTTP 接口迁移至 Redis 存储,提升性能与可靠性
- 支持显式作用域前缀:site:xxx 和 global:xxx
- 实现变量 TTL 与一次性读取功能(var_ttl、var_single_use)
- 新增 VariableScope 枚举与 VariableTarget 缓存键设计
- 改进 VariableResolver 缓存机制以兼容作用域隔离
- 更新 README 文档说明新变量语法与使用示例
- 移除 settings 中已弃用的 variable_service_url 配置项
- 调整 ActionRegistry 自动注册逻辑以适配模块化扫描
- 统一浏览器选择器模式分隔符由 '=' 改为 ':'
- 优化浏览器元素等待与属性设置的容错处理逻辑
2025-10-20 21:47:58 +08:00

199 lines
7.1 KiB
XML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
elementFormDefault="qualified"
attributeFormDefault="unqualified">
<xs:annotation>
<xs:documentation>
xspider XML 模板 Schema用于在编辑器中提示必填项、枚举值与作用域规则。
适用于描述站点配置、流程、动作与抽取字段。
</xs:documentation>
</xs:annotation>
<!-- 基础类型 -->
<xs:simpleType name="BooleanFlag">
<xs:annotation>
<xs:documentation>
支持 true/false, 1/0, yes/no, on/off。
</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="true"/>
<xs:enumeration value="false"/>
<xs:enumeration value="1"/>
<xs:enumeration value="0"/>
<xs:enumeration value="yes"/>
<xs:enumeration value="no"/>
<xs:enumeration value="on"/>
<xs:enumeration value="off"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="SelectorMode">
<xs:restriction base="xs:string">
<xs:enumeration value="css"/>
<xs:enumeration value="xpath"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="UniqueKeysMode">
<xs:restriction base="xs:string">
<xs:enumeration value="all"/>
<xs:enumeration value="custom"/>
<xs:enumeration value="null"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="NonNegativeInt">
<xs:restriction base="xs:nonNegativeInteger"/>
</xs:simpleType>
<!-- Header -->
<xs:complexType name="HeaderType">
<xs:annotation>
<xs:documentation>HTTP 请求头设置</xs:documentation>
</xs:annotation>
<xs:attribute name="name" type="xs:string" use="required"/>
<xs:attribute name="value" type="xs:string" use="optional"/>
</xs:complexType>
<!-- Config -->
<xs:complexType name="ConfigType">
<xs:sequence>
<xs:element name="header" type="HeaderType" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
<xs:attribute name="enable_proxy" type="BooleanFlag" default="false"/>
<xs:attribute name="rotate_ua" type="BooleanFlag" default="false"/>
<xs:attribute name="retry" type="xs:nonNegativeInteger" default="3"/>
</xs:complexType>
<!-- Action -->
<xs:complexType name="ActionType" mixed="true">
<xs:annotation>
<xs:documentation>
动作执行配置。内置类型包含goto、click、type、wait_dom_show、
wait_dom_gone、wait_dom_hide、wait_time、run_js、set_header、set_attr、
set_var、captcha。可按需扩展自定义类型。
</xs:documentation>
</xs:annotation>
<xs:sequence/>
<xs:attribute name="type" type="xs:string" use="required"/>
<xs:attribute name="selector" type="xs:string"/>
<xs:attribute name="mode" type="SelectorMode" default="xpath"/>
<xs:attribute name="timeout_ms" type="NonNegativeInt"/>
<xs:attribute name="after_wait" type="NonNegativeInt" default="0"/>
<xs:anyAttribute processContents="lax"/>
</xs:complexType>
<!-- Field -->
<xs:complexType name="FieldType">
<xs:annotation>
<xs:documentation>字段抽取规则selector 可为 CSS 或 XPath。</xs:documentation>
</xs:annotation>
<xs:attribute name="name" type="xs:string" use="required"/>
<xs:attribute name="selector" type="xs:string" use="required"/>
<xs:attribute name="mode" type="SelectorMode" default="css"/>
<xs:attribute name="value_type" type="xs:string"/>
</xs:complexType>
<!-- Download -->
<xs:complexType name="DownloadType">
<xs:annotation>
<xs:documentation>下载附件配置,将所有属性传入下载器。</xs:documentation>
</xs:annotation>
<xs:sequence/>
<xs:anyAttribute processContents="lax"/>
</xs:complexType>
<!-- Extract -->
<xs:complexType name="ExtractType">
<xs:annotation>
<xs:documentation>
表格/列表抽取配置。record_css 或 record_xpath 至少填写一个。
</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="field" type="FieldType" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="download" type="DownloadType" minOccurs="0"/>
</xs:sequence>
<xs:attribute name="record_css" type="xs:string"/>
<xs:attribute name="record_xpath" type="xs:string"/>
</xs:complexType>
<!-- Excel Extract -->
<xs:complexType name="ExcelExtractType">
<xs:annotation>
<xs:documentation>Excel 文件抽取配置file_pattern 与 pattern 二选一。</xs:documentation>
</xs:annotation>
<xs:sequence/>
<xs:attribute name="file_pattern" type="xs:string"/>
<xs:attribute name="pattern" type="xs:string"/>
<xs:attribute name="directory" type="xs:string"/>
</xs:complexType>
<!-- Pagination -->
<xs:complexType name="PaginateType">
<xs:annotation>
<xs:documentation>分页配置,可指定 XPath 或 CSS 选择器。</xs:documentation>
</xs:annotation>
<xs:sequence/>
<xs:attribute name="selector" type="xs:string"/>
<xs:attribute name="css" type="xs:string"/>
<xs:attribute name="mode" type="SelectorMode" default="xpath"/>
<xs:attribute name="max_pages" type="xs:nonNegativeInteger"/>
</xs:complexType>
<!-- Flow -->
<xs:complexType name="FlowType">
<xs:annotation>
<xs:documentation>
业务流程。建议至少配置 extract 或 excel_extract。
</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="action" type="ActionType" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="extract" type="ExtractType" minOccurs="0"/>
<xs:element name="excel_extract" type="ExcelExtractType" minOccurs="0"/>
<xs:element name="paginate" type="PaginateType" minOccurs="0"/>
</xs:sequence>
<xs:attribute name="id" type="xs:string"/>
<xs:attribute name="entry" type="xs:string"/>
<xs:attribute name="url" type="xs:string"/>
<xs:attribute name="data_type" type="xs:string"/>
<xs:attribute name="unique_keys" type="UniqueKeysMode" default="all"/>
<xs:attribute name="columns" type="xs:string"/>
<xs:anyAttribute processContents="lax"/>
</xs:complexType>
<xs:complexType name="FlowsType">
<xs:sequence>
<xs:element name="flow" type="FlowType" minOccurs="1" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
<xs:complexType name="LoginType">
<xs:annotation>
<xs:documentation>登录流程,结构与 FlowType 相同但允许缺少抽取步骤。</xs:documentation>
</xs:annotation>
<xs:complexContent>
<xs:extension base="FlowType"/>
</xs:complexContent>
</xs:complexType>
<!-- Root -->
<xs:complexType name="SiteType">
<xs:sequence>
<xs:element name="config" type="ConfigType" minOccurs="0"/>
<xs:element name="login" type="LoginType" minOccurs="0"/>
<xs:element name="flows" type="FlowsType" minOccurs="0"/>
</xs:sequence>
<xs:attribute name="id" type="xs:string" use="required"/>
<xs:attribute name="base" type="xs:string"/>
<xs:anyAttribute processContents="lax"/>
</xs:complexType>
<xs:element name="site" type="SiteType"/>
</xs:schema>