原创

数据清理

select * from isc_user where namecode like '%qy.gd.csg.cn%'
select * from tmp_yx_user_0401
isc_user t.NAMECODE
hr_user_qy0331_ok LOGIN_NAME is not null /*5334*/

select * from tmp_yx_user_0401_qy_login_name;
select * from hr_user_qy0331_ok;

CREATE TABLE tmp_yx_user_qy_0403 as (
select y.*,h.员工id,h.ou AS HROU,h.员工所属部门名称,h.从身份证号码截取出生日期,h.deptid,h.login_name AS lOGINENAME,h.Isc_4aid,H.USER_ID from tmp_yx_user_0401_qy_login_name y
join  hr_user_qy0331_ok h
on y.login_name = h.login_name or y.姓名 = h.中文全名)

select * from tmp_yx_user_qy_0403 where 用户状态 = 1 and loginename in (
select q.loginename from tmp_yx_user_qy_0403 q group by q.loginename having count(1)>1 )

/*处理完全重复rowid为小的记录,增加一个mark字段简化操作*/
update tmp_yx_user_qy_0403 set mark = '用户ID重复'
/*select * from tmp_yx_user_qy_0403 */
 where 用户ID in (select 用户ID  from tmp_yx_user_qy_0403 group by 用户ID having count(用户ID) > 1)
 
update tmp_yx_user_qy_0403 set mark = 'delete'
/*select * from tmp_yx_user_qy_0403 */
 where 用户ID in (select 用户ID  from tmp_yx_user_qy_0403 group by 用户ID having count(用户ID) > 1)
 and 用户状态 ='0' and rowid not in
 (select min(rowid) from tmp_yx_user_qy_0403 group by 用户ID having count(用户ID) > 1)
 
update tmp_yx_user_qy_0403 set loginename = '' where 姓名 in (
 select 姓名 from tmp_yx_user_qy_0403
 where 用户ID in (select 用户ID  from tmp_yx_user_qy_0403 group by 用户ID having count(用户ID) > 1))
 
 select 用户ID,姓名,用户状态,MARK from tmp_yx_user_qy_0403 where loginename is null for update
 
 delete tmp_yx_user_qy_0403 where mark = 'delete'
 
  select 用户ID,姓名,用户状态,MARK from tmp_yx_user_qy_0403
 where 用户ID in (select 用户ID  from tmp_yx_user_qy_0403 group by 用户ID having count(用户ID) > 1)
 
update tmp_yx_user_qy_0403 set loginename = fn_getpy(姓名)||'@'||'qy.gd.csg.cn'
 

/*处理重复rowid为小的记录,增加一个mark字段简化操作*/
update tmp_yx_user_qy_0403 set mark = 'same'
/*select * from tmp_yx_user_qy_0403 */
 where loginename in (select loginename  from tmp_yx_user_qy_0403 group by loginename having count(loginename) > 1)
 and rowid not in
 (select min(rowid) from tmp_yx_user_qy_0403 group by loginename having count(loginename) > 1)
 
/*问题数据:状态为0或用户名重复数据 1087*/
select * from tmp_yx_user_qy_0403 where mark is null and 用户状态 = '0' and
select * from tmp_yx_user_qy_0403 where mark is not null and 用户状态 = '1'
select count(*) from tmp_yx_user_qy_0403 where isc4aid is null;
select count(*) from tmp_yx_user_qy_0403 where isc_4aid is null;

/*完全正确数据*/
select * from tmp_yx_user_qy_0403 where 用户ID not in (
 select 用户id from tmp_yx_user_qy_0403 where isc_4aID in (
 select isc_4aid from tmp_yx_user_qy_0403 group by isc_4aid having count(isc_4aid) > 1))
 
 
 
/* 处理mark=1的重复登录名*/
select * from tmp_yx_user_qy_0403 where mark = '1' for update;
update tmp_yx_user_qy_0403 l set l.loginename = substr(l.loginename,1,instr(l.loginename, '@') - 1) ||
                          to_char(to_date(从身份证号码截取出生日期, 'yyyy/mm/dd'), 'mmdd') ||
                          substr(l.loginename, instr(l.loginename, '@'))
                          where l.mark = '1'
                          
/*再次更新login_name重复mark 为 2*/
update tmp_yx_user_qy_0403 set mark = '2'
/*select * from tmp_yx_user_qy_0403 */
 where loginename in (select loginename  from tmp_yx_user_qy_0403 group by loginename having count(loginename) > 1)
 and rowid not in
 (select min(rowid) from tmp_yx_user_qy_0403 group by loginename having count(loginename) > 1)

 /* 处理mark=0001的重复登录名*/
select * from tmp_yx_user_qy_0403 where mark = '2' ;
update tmp_yx_user_qy_0403 t
set t.loginename = fn_getpy(姓名)||'0001'||'@'||'qy.gd.csg.cn'
where mark = '2'

select fn_getpy(姓名)||'0001'||'@'||'qy.gd.csg.cn' from tmp_yx_user_qy_0403 where mark = '2' ;

/*最后第三次查询login_name重复的记录*/
select 用户ID,姓名,基准组织,用户状态,loginename,mark,从身份证号码截取出生日期 from tmp_yx_user_qy_0403
where 用户状态 = '1' and 姓名 in (
select 姓名 from tmp_yx_user_qy_0403 where 姓名 in (
select 姓名 from tmp_yx_user_qy_0403 where loginename in
(select o.loginename from tmp_yx_user_qy_0403 o group by o.loginename having count(1) >1)))

select 用户ID,姓名,基准组织,loginename,mark from tmp_yx_user_qy_0403 w where w.mark is null

正文到此结束
本文目录