一、场景案例
- 标签类型数据:如将员工标签类型的数据
- 技能:Java、Spring、微服务、项目管理 - 项目经验:电商、金融、教育
- 工作风格:#执行力强 #跨部门协作 #数据驱动 - 分类层级数据:如将组织部门层级数据:“/集团/分公司/信息技术部/数据团队”
除此之外,在一些特定的业务场景中,如证券场景中,需要将一笔回购合约涉及到的多个质押券进行拆分(提取每个质押券有质押数量及对应标准券转换比例),数据示例如下:

另外,API请求的返回的数据多为半结构化的字典列表数据,需要将每行数据从列表中进行提取出来再进行后续的处理,如以下数据:
{"data": [
{"AMOUNT":-9000,"SUB_TYPE_NAME":"债券分销","BRANCH_NAME":"银行间","TYPE_NAME":"收入","BUSI_DATE":"20250831","PROJECT_NAME":"调整1月分销佣金-24****MTN001"},{"AMOUNT":-1075.61,"SUB_TYPE_NAME":"债券分销","BRANCH_NAME":"银行间","TYPE_NAME":"收入","BUSI_DATE":"20250531","PROJECT_NAME":"调整计提1月及3月分销佣金"}],
"success":true
}
二,各类主流SQL方言实现方法
针对这类场景,目前没有标准SQL函数支持实现该功能。各类SQL方言通过自定义方法进行支持,以下是常用的实现方法。
Spark/Hive SQL


PostgreSQL


select string_to_array('apple,banana,orange', ',') as item; select string_to_array('apple|banana|orange', '|') as item; select unnest(string_to_array('apple,banana,orange', ',')) as item;select regexp_split_to_array('apple|banana|orange', '\|') as item; select regexp_split_to_table('apple|banana|orange', '\|') as item;
Oracle

WITH CTE_DATA AS (SELECT 'tom' AS NAME, 'apple,banana,cherry' AS FRUIT FROM DUAL ) SELECT NAME, REGEXP_SUBSTR(FRUIT, '[^,]+', 1, LEVEL) AS FRUIT FROM CTE_DATA CONNECT BY REGEXP_SUBSTR(FRUIT, '[^,]+', 1, LEVEL) IS NOT NULL ;
MySQL

CREATE TABLE IF NOT EXISTS numbers (n INT PRIMARY KEY); INSERT INTO numbers SELECT a.N + b.N * 10 + c.N * 100 + 1 AS n FROM (SELECT 0 AS N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) a CROSS JOIN (SELECT 0 AS N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) b CROSS JOIN (SELECT 0 AS N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) c; -- 使用数字表分割 SELECT SUBSTRING_INDEX( SUBSTRING_INDEX('a,b,c,d', ',', numbers.n), ',', -1 ) AS part FROM numbers WHERE numbers.n <= LENGTH('a,b,c,d') - LENGTH(REPLACE('a,b,c,d', ',', '')) + 1;
8.0+版本可使用json_table
SELECT o.order_id,j.product_id FROM orders o JOIN JSON_TABLE(CONCAT('["', REPLACE(o.product_ids, ',', '","'), '"]'),'$[*]' COLUMNS (product_id INT PATH '$') ) AS j;
SQL Server
CREATE FUNCTION dbo.SplitString (@String NVARCHAR(MAX),@Delimiter CHAR(1) ) RETURNS @Results TABLE (ID INT IDENTITY(1,1),Value NVARCHAR(MAX) ) AS BEGINDECLARE @pos INT = 0DECLARE @slice NVARCHAR(MAX)IF RIGHT(@String, 1) != @DelimiterSET @String = @String + @DelimiterWHILE CHARINDEX(@Delimiter, @String) > 0BEGINSET @pos = CHARINDEX(@Delimiter, @String)SET @slice = LEFT(@String, @pos - 1)INSERT INTO @Results (Value) VALUES (@slice)SET @String = STUFF(@String, 1, @pos, '')ENDRETURN END-- 使用示例 SELECT * FROM dbo.SplitString('张三,李四,王五', ',')
DECLARE @str NVARCHAR(100) = '苹果|香蕉|橙子'; DECLARE @separator CHAR(1) = '|';SELECT LTRIM(RTRIM(m.n.value('.[1]', 'NVARCHAR(100)'))) AS SplitValue FROM (SELECT CAST('<x>' + REPLACE(@str, @separator, '</x><x>') + '</x>' AS XML)) AS t(x) CROSS APPLY x.nodes('/x') AS m(n);
2016之后的版本可使用原生方法STRING_SPLIT
--带序号的版本(SQL Server 2022+) SELECT value, ordinal FROM STRING_SPLIT('a,b,c', ',', 1) -- 第三个参数启用序号