我正在尝试从现有评论字段中提取电子邮件地址,并将其放入自己的列中。字符串可能是这样的:"这是一个示例评论,其中包含某个电子邮件地址someemail@domain.org" 或者只是电子邮件本身 "someemail@domain.org"。
我认为最好的做法是找到 '@' 符号的索引,并向两个方向搜索,直到要么到达字符串的末尾,或者遇到一个空格。有人能帮我实现这个功能吗?
我正在尝试从现有评论字段中提取电子邮件地址,并将其放入自己的列中。字符串可能是这样的:"这是一个示例评论,其中包含某个电子邮件地址someemail@domain.org" 或者只是电子邮件本身 "someemail@domain.org"。
我认为最好的做法是找到 '@' 符号的索引,并向两个方向搜索,直到要么到达字符串的末尾,或者遇到一个空格。有人能帮我实现这个功能吗?
我知道wewesthemenace已经回答了这个问题,但他/她的解决方案似乎过于复杂。为什么要将电子邮件地址的左侧和右侧连接在一起呢?我宁愿找到电子邮件地址的开头和结尾,然后使用子字符串返回电子邮件地址,如下所示:
DECLARE @Table TABLE (comment NVARCHAR(50));
INSERT INTO @Table
VALUES ('blah MyEmailAddress@domain.org'), --At the end
('blah MyEmailAddress@domain.org blah blah'), --In the middle
('MyEmailAddress@domain.org blah'), --At the beginning
('no email');
SELECT comment,
CASE
WHEN CHARINDEX('@',comment) = 0 THEN NULL
ELSE SUBSTRING(comment,beginningOfEmail,endOfEmail-beginningOfEmail)
END email
FROM @Table
CROSS APPLY (SELECT CHARINDEX(' ',comment + ' ',CHARINDEX('@',comment))) AS A(endOfEmail)
CROSS APPLY (SELECT DATALENGTH(comment)/2 - CHARINDEX(' ',REVERSE(' ' + comment),CHARINDEX('@',REVERSE(' ' + comment))) + 2) AS B(beginningOfEmail)
结果:
comment email
-------------------------------------------------- --------------------------------------------------
blah MyEmailAddress@domain.org MyEmailAddress@domain.org
blah MyEmailAddress@domain.org blah blah MyEmailAddress@domain.org
MyEmailAddress@domain.org blah MyEmailAddress@domain.org
no email NULL
'@'
。然后获取'@'
左侧和右侧的字符串。然后,你想要反转'@'
左侧的字符串,并找到第一个出现的空格,然后从那里获取SUBSTRING
。然后将其反转以获得原始形式。同样的准则适用于右侧,不需要进行REVERSE
。'some text someemail@domain.org some text'
LEFT
= 'some text someemail'RIGHT
= '@domain.org some text'SUBSTRING
到第一个空格 = 'liameemos'REVERSE
(4) = someemailSUBSTRING
(2) 到第一个空格 = '@domain.org';WITH CteEmail(email) AS(
SELECT 'someemail@domain.org' UNION ALL
SELECT 'some text someemail@domain.org some text' UNION ALL
SELECT 'no email'
)
,CteStrings AS(
SELECT
[Left] = LEFT(email, CHARINDEX('@', email, 0) - 1),
Reverse_Left = REVERSE(LEFT(email, CHARINDEX('@', email, 0) - 1)),
[Right] = RIGHT(email, CHARINDEX('@', email, 0) + 1)
FROM CteEmail
WHERE email LIKE '%@%'
)
SELECT *,
REVERSE(
SUBSTRING(Reverse_Left, 0,
CASE
WHEN CHARINDEX(' ', Reverse_Left, 0) = 0 THEN LEN(Reverse_Left) + 1
ELSE CHARINDEX(' ', Reverse_Left, 0)
END
)
)
+
SUBSTRING([Right], 0,
CASE
WHEN CHARINDEX(' ', [Right], 0) = 0 THEN LEN([Right]) + 1
ELSE CHARINDEX(' ', [Right], 0)
END
)
FROM CteStrings
样本数据:
email
----------------------------------------
someemail@domain.org
some text someemail@domain.org some text
no email
结果
---------------------
someemail@domain.org
someemail@domain.org
SUBSTRING
、LEFT
和 RIGHT
函数的内容。 - Felix Pamittan[Right] = RIGHT(email, LEN(email) - CHARINDEX('@', email, 0) + 1)
- andres descalzoStephan的回答非常适用于在每一行中查找单个电子邮件地址。
但是,当尝试获取每行中的多个电子邮件地址时,我遇到了以下错误:
传递给LEFT或SUBSTRING函数的无效长度参数
我使用了DBA Stack Exchange中的这个答案来获取字符串中所有@
的位置。它包括一个返回与字符串中某个模式相等的位置数的表值函数。我还必须修改CROSS APPLY
函数以处理多个电子邮件地址。
我的表格:
DECLARE @Table TABLE (comment VARCHAR(500));
INSERT INTO @Table (comment)
VALUES ('blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com'),
('blah hello.world@domain.org more'),
('no email')
表值函数:
CREATE FUNCTION dbo.fnFindPatternLocation
(
@string NVARCHAR(MAX),
@term NVARCHAR(255)
)
RETURNS TABLE
AS
RETURN
(
SELECT pos = Number - LEN(@term)
FROM (SELECT Number, Item = LTRIM(RTRIM(SUBSTRING(@string, Number,
CHARINDEX(@term, @string + @term, Number) - Number)))
FROM (SELECT ROW_NUMBER() OVER (ORDER BY [object_id])
FROM sys.all_objects) AS n(Number)
WHERE Number > 1 AND Number <= CONVERT(INT, LEN(@string))
AND SUBSTRING(@term + @string, Number, LEN(@term)) = @term
) AS y);
GO
查询:
SELECT comment, pos, SUBSTRING(comment,beginningOfEmail,endOfEmail-beginningOfEmail) AS email
FROM @Table
CROSS APPLY (SELECT pos FROM dbo.fnFindPatternLocation(comment, '@')) AS A(pos)
CROSS APPLY (SELECT CHARINDEX(' ',comment + ' ', pos)) AS B(endOfEmail)
CROSS APPLY (SELECT pos - CHARINDEX(' ', REVERSE(SUBSTRING(comment, 1, pos))) + 2) AS C(beginningOfEmail)
结果:
comment
---------------------------------------------------------------------------------------------------------
blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
blah hello.world@domain.org more
pos email
--- ------------------------------
26 My.EmailAddress@domain.org
64 someemailaddress@domain.com
95 asdf@gmail.com
17 hello.world@domain.org
DECLARE @t TABLE (row_id INT, email VARCHAR(100))
INSERT @t (row_id, email)
VALUES (1, 'drgkls<ivan@gvi.ru>, info@gvi.com, @ dgh507-16-65@'),
(2, 'hjshfkjshfj@kjs.kjsehf herwfjewr@kjsd.com adjfhja@.com u3483dhj@hhb@.dfj'),
(3, 'kjsdghfjs4254.23detygh@jhjdfg.dgb лдоврывплдоо isgfsi@ klsdfksdl@,dd.com')
DECLARE @pat VARCHAR(100) = '%[^a-z0-9@._ ]%';
WITH f AS (
SELECT row_id,
CAST(' ' + email + ' ' AS VARCHAR(102)) email,
SUBSTRING(email, PATINDEX(@pat, email), 1) bad,
PATINDEX(@pat, email) pat
FROM @t
UNION ALL
SELECT row_id,
CAST(REPLACE(email, bad, ' ') AS VARCHAR(102)),
SUBSTRING(REPLACE(email, bad, ' '), PATINDEX(@pat, REPLACE(email, bad, ' ')), 1) bad,
PATINDEX(@pat, REPLACE(email, bad, ' '))
FROM f
WHERE PATINDEX(@pat, email) > 0
),
s AS
(
SELECT row_id,
email, PATINDEX('%@%', email) pos
FROM f
WHERE pat = 0
AND PATINDEX('%@%', email) > 0
UNION ALL
SELECT row_id,
SUBSTRING(email, pos + 1, 102),
PATINDEX('%@%', SUBSTRING(email, pos + 1, 102))
FROM s
WHERE PATINDEX('%@%', SUBSTRING(email, pos + 1, 102)) > 0
)
SELECT row_id, o1 + pp
FROM s
CROSS APPLY (SELECT REVERSE(LEFT(email, pos -1)) s1) x
CROSS APPLY (SELECT CHARINDEX(' ', s1) i1) y
CROSS APPLY (SELECT REVERSE(LEFT(s1, i1 -1)) o1 WHERE i1 > 0) z
CROSS APPLY (SELECT CHARINDEX(' ', email, pos) i2) e
CROSS APPLY (SELECT SUBSTRING(email, pos, i2 -pos) pp WHERE i2 > pos + 1) q
WHERE LEN(o1) > 1
AND CHARINDEX('.', pp) > 0
AND PATINDEX('%@%@%', pp) = 0
AND PATINDEX('%@.%', pp) = 0
AND PATINDEX('%.', pp) = 0
CREATE FUNCTION [dbo].[extractEmail]
(
@input nvarchar(500)
)
RETURNS nvarchar(100)
AS
BEGIN
DECLARE @atPosition int
DECLARE @firstRelevantSpace int
DECLARE @name nvarchar(100)
DECLARE @secondRelelvantSpace int
DECLARE @everythingAfterAt nvarchar(500)
DECLARE @domain nvarchar(100)
DECLARE @email nvarchar(100) = ''
IF CHARINDEX('@', @input,0) > 0
BEGIN
SET @input = ' ' + @input
SET @atPosition = CHARINDEX('@', @input, 0)
SET @firstRelevantSpace = CHARINDEX(' ',REVERSE(LEFT(@input, CHARINDEX('@', @input, 0) - 1)))
SET @name = REVERSE(LEFT(REVERSE(LEFT(@input, @atPosition - 1)),@firstRelevantSpace-1))
SET @everythingAfterAt = SUBSTRING(@input, @atPosition,len(@input)-@atPosition+1)
SET @secondRelelvantSpace = CHARINDEX(' ',@everythingAfterAt)
IF @secondRelelvantSpace = 0
SET @domain = @everythingAfterAt
ELSE
SET @domain = LEFT(@everythingAfterAt, @secondRelelvantSpace)
SET @email = @name + @domain
END
RETURN @email
END
针对包含换行符的字符串,我修改了Felix的答案,使用PATINDEX
搜索第一个控制字符而不是空格。
我还必须修改Right
字段以减去正确数量的文本。
WITH CteEmail(email) AS(
SELECT 'example string with new lines
Email: some.example@email.address.com
(first email address - should be returned)
Email: another@test.co.uk
(other email addresses should be ignored
more example text' UNION ALL
SELECT 'Email: some.example@email.address.com' UNION ALL
SELECT 'someemail@domain.org' UNION ALL
SELECT 'some text someemail@domain.org some text' UNION ALL
SELECT 'no email'
)
,CteStrings AS(
SELECT
[Left] = LEFT(email, CHARINDEX('@', email, 0) - 1),
Reverse_Left = REVERSE(LEFT(email, CHARINDEX('@', email, 0) - 1)),
[Right] = RIGHT(email, LEN(email) - CHARINDEX('@', email, 0) + 1 )
FROM CteEmail
WHERE email LIKE '%@%'
)
SELECT *,
REVERSE(
SUBSTRING(Reverse_Left, 0,
CASE
WHEN PATINDEX('%[' + CHAR(10)+'- ]%', Reverse_Left) = 0 THEN LEN(Reverse_Left) + 1
ELSE PATINDEX('%[' + CHAR(0)+'- ]%', Reverse_Left)
END
)
)
+
SUBSTRING([Right], 0,
CASE
WHEN PATINDEX('%[' + CHAR(0)+'- ]%', [Right]) = 0 THEN LEN([Right]) + 1
ELSE PATINDEX('%[' + CHAR(0)+'- ]%', [Right])
END
)
FROM CteStrings
--declare @a varchar(100)
--set @a = 'a asfd saasd asdfgh@asd.com wqe z zx cxzc '
select substring(substring(@a,0,charindex('@',@a)),len(substring(@a,0,charindex('@',@a)))-charindex(' ',reverse(substring(@a,0,charindex('@',@a))))+2,len(substring(@a,0,charindex('@',@a)))) + substring(substring(@a,charindex('@',@a),len(@a)),0,charindex(' ',substring(@a,charindex('@',@a),len(@a))))
select extractEmail(replace(replace(MyColumn,CHAR(10),' '),CHAR(13),' ')) as AsYouWish from FacilityContacts