python处理excle单元格内符号分割姓名匹配工号

Lengxiy · 发表于 2024-3-11 14:01

本帖最后由 Lengxiy 于 2024-3-11 14:10 编辑

我知道excle里面有分列这个功能，不过原数据里面有用tab打出来的符号空格在.....还有一些奇奇怪怪我都没见过的符号，后面还是用一部分去除奇怪符号的代码搞定的，因为还要排除新疆同学的姓名连接符号：“·”单纯整理数据的时候不知道excle怎么操作，就编写了个屎山代码凑合着用
需要处理的数据样例(姓名匹配工号/学号，并且整理后的数据由","分割)，仅展示部分，无敏感信息，省略1000+条数据：

被查询数据(仅展示部分，无敏感信息)：

本来想着用vlookup函数的，结果试了一下，一个单元格内的多个数据无法查询，更别提里面的数据乱成啥样了....
下面是代码，写的垃圾不过刚好够我用.....

[Python] 纯文本查看 复制代码

import pandas as pd

data = pd.read_excel("./整理后结果_获奖结果.xlsx", sheet_name="Sheet2", na_values=['', 0])
#需要被查询的列
stu_names = data["参赛学生姓名"].tolist()
tea_names = data["指导教师"].tolist()
# print(tea_names)

#学生
search_data_stu = pd.read_excel("./原素材/data.xlsx", sheet_name="SJCJ_JW_XSJBXXB-1", na_values=['', 0])
search_stu_name = search_data_stu["姓名"].tolist()#姓名
search_stu_score = search_data_stu["学号"].tolist()#学号

#老师
search_data_tea = pd.read_excel("./原素材/data.xlsx", sheet_name="SJCJ_RS_JSJBXX-1", na_values=['', 0])
search_tea_name = search_data_tea["姓名"].tolist()
search_tea_scor = search_data_tea["工号"].tolist()

#查询对应的教师/学生的工号/学号位置
def find_position(search_string, my_list):
    try:
        position = my_list.index(search_string)
        return position
    except ValueError:
        #数据匹配不到的都设置成：无
        return f"无"

#对教师工号进行补位操作
def buwei(s_int):
    l = len(s_int)
    if l<7:
        while l<7:
            s_int = "0"+s_int
            l = l +1
        return s_int
    return s_int

name_hebing = []#姓名用逗号合并
score_hebing = []#学号用逗号合并

tea_g = "" #教师工号
tea_n = "" #教师姓名
for tea in tea_names:
    # print(tea)
    if pd.notna(tea):
        c = len(tea)
        x = 0   
        y = 0  #前一个字符
        s = ""
        if c == 0:
            continue
        while x<=c:

            if x == c:
                tea_n = tea_n + tea[y:x]
                s = tea[y:x]
                position = find_position(s,search_tea_name)
                if position == "无":
                    tea_g = tea_g + position
                    # n = n + position
                    x = x + 1
                    continue

                tea_g = tea_g + buwei(str(search_tea_scor[position]))

            elif tea[x] == "、":
                s = tea[y:x]
                tea_n = tea_n + tea[y:x]+","
                y = x + 1
                position = find_position(s,search_tea_name)
                if position == "无":
                    tea_g = tea_g + position + ","
                    # n = n + position + ","
                    x = x + 1
                    continue
                tea_g = tea_g + buwei(str(search_tea_scor[position])) + ","

            elif tea[x] == "，":
                s = tea[y:x]
                tea_n = tea_n + tea[y:x]+","
                y = x + 1
                position = find_position(s,search_tea_name)
                if position == "无":
                    tea_g = tea_g + position + ","
                    # n = n + position + ","
                    x = x + 1
                    continue
                tea_g = tea_g + buwei(str(search_tea_scor[position])) + ","

            elif tea[x] == ",":
                s = tea[y:x]
                tea_n = tea_n + tea[y:x]+","
                y = x + 1
                position = find_position(s,search_tea_name)
                if position == "无":
                    tea_g = tea_g + position + ","
                    # n = n + position + ","
                    x = x + 1
                    continue
                tea_g = tea_g + buwei(str(search_tea_scor[position])) + ","

            elif tea[x] == " ":
                pass
            
            s = ""
            x = x+1

        name_hebing.append(tea_n)
        score_hebing.append(tea_g)

        tea_n = ""
        tea_g = ""
    else:
        name_hebing.append("无")
        score_hebing.append("无")

s_name_hebing = []
s_score_hebing = []
stu_g = "" #学生学号
stu_n = "" #学生姓名
for stu in stu_names:
    # print(tea)
    if pd.notna(stu):
        c = len(stu)
        x = 0   
        y = 0  #前一个字符
        s = ""
        if c == 0:
            continue
        while x<=c:

            if x == c:
                stu_n = stu_n + stu[y:x]
                s = stu[y:x]
                position = find_position(s,search_stu_name)
                if position == "无":
                    stu_g = stu_g + position
                    # n = n + position
                    x = x + 1
                    continue

                stu_g = stu_g + buwei(str(search_stu_score[position]))

            elif stu[x] == "、":
                s = stu[y:x]
                stu_n = stu_n + stu[y:x]+","
                y = x + 1
                position = find_position(s,search_stu_name)
                if position == "无":
                    stu_g = stu_g + position + ","
                    # n = n + position + ","
                    x = x + 1
                    continue
                stu_g = stu_g + buwei(str(search_stu_score[position])) + ","

            elif stu[x] == "，":
                s = stu[y:x]
                stu_n = stu_n + stu[y:x]+","
                y = x + 1
                position = find_position(s,search_stu_name)
                if position == "无":
                    stu_g = stu_g + position + ","
                    # n = n + position + ","
                    x = x + 1
                    continue
                stu_g = stu_g + buwei(str(search_stu_score[position])) + ","

            elif stu[x] == ",":
                s = stu[y:x]
                stu_n = stu_n + stu[y:x]+","
                y = x + 1
                position = find_position(s,search_stu_name)
                if position == "无":
                    stu_g = stu_g + position + ","
                    # n = n + position + ","
                    x = x + 1
                    continue
                stu_g = stu_g + buwei(str(search_stu_score[position])) + ","

            s = ""
            x = x+1

        s_name_hebing.append(stu_n)
        s_score_hebing.append(stu_g)

        stu_n = ""
        stu_g = ""
    else:
        s_name_hebing.append("无")
        s_score_hebing.append("无")

result_df = pd.DataFrame({
    '教师姓名': name_hebing,
    '教师工号': score_hebing,
    '学生姓名': s_name_hebing,
    '学生学号': s_score_hebing
})

# 将数据框写入新的Excel文件
print("完成")
result_df.to_excel('output_result.xlsx', index=False)

Lengxiy · 发表于 2024-3-12 09:09

调味包发表于 2024-3-11 20:36
一分钟搞定的事情硬生生让你写代码写了半小时

说得好.....

kj1004 · 发表于 2024-3-15 10:42

解决问题的同时锻炼自己值得，虽然过程艰辛

。过去有一次遇到单元格里有奇怪的东西时候，我曾尝试把表格弄到word里清除了，还比较方便，具体过程我记不住了，很久之前的事情了，希望对你有个参考，跳出这个软件，有些问题会容易解决

wangarc · 发表于 2024-3-11 14:03

excel自带数据分列功能

Lengxiy · 发表于 2024-3-11 14:07

wangarc 发表于 2024-3-11 14:03
excel自带数据分列功能

我知道，但是领导不希望数据分列，虽然我也想过用分列的方法符号为间隔展开数据，然而低估了数据的麻烦程度。主要是最后还要合并起来我也嫌麻烦

caojian1624 · 发表于 2024-3-11 14:35

谢谢分享

wuuuluojun · 发表于 2024-3-11 14:39

谢谢分享

TWYX · 发表于 2024-3-11 16:34

能跑起来实现需求就是好代码

jiajia112233 · 发表于 2024-3-11 16:49

是Excel 不是excle

调味包 · 发表于 2024-3-11 20:36

一分钟搞定的事情硬生生让你写代码写了半小时

gison · 发表于 2024-3-12 08:42

谢谢楼主, 我正好可以用到

帐号		自动登录	找回密码
密码			注册[Register]

[Python 原创] python处理excle单元格内符号分割姓名匹配工号

免费评分