最新要闻

广告

手机

iphone11大小尺寸是多少?苹果iPhone11和iPhone13的区别是什么?

iphone11大小尺寸是多少?苹果iPhone11和iPhone13的区别是什么?

警方通报辅警执法直播中被撞飞:犯罪嫌疑人已投案

警方通报辅警执法直播中被撞飞:犯罪嫌疑人已投案

家电

Python 基于xml.etree.ElementTree实现XML对比

来源:博客园


(相关资料图)

测试环境

Python 3.6

Win10

代码实现

#!/usr/bin/env python 3.4.0#-*- encoding:utf-8 -*-__author__ = "shouke"import xml.etree.ElementTree as ETdef compare_xml_node_attributes(xml_node1, xml_node2):    result = []    node1_attributes_dict = xml_node1.attrib    node2_attributes_dict = xml_node2.attrib    for attrib1, value in node1_attributes_dict.items():        value2 =  node2_attributes_dict.get(attrib1)        if value == value2:            node2_attributes_dict.pop(attrib1)        else:            if value2:                attrib2 = attrib1                node2_attributes_dict.pop(attrib2)            else:                attrib2 = "不存在"            result.append("结点1属性:{attrib1} 值:{value1},结点2属性:{attrib1} 值:{value2}".format(attrib1=attrib1 or "不存在",                                                                                         value1=value or "不存在",                                                                                         attrib2=attrib2,                                                                                         value2=value2 or "不存在"))    for attrib2, value2 in node2_attributes_dict.items():        result.append("结点1属性:{attrib1} 值:{value1},结点2属性:{attrib1} 值:{value2}".format(attrib1="不存在",                                                                                         value1="不存在",                                                                                         attrib2=attrib2,                                                                                         value2=value2))    return resultdef compare_xml_node_children(xml_node1, xml_node2, node1_xpath, node2_xpath):    def get_node_children(xml_node, node_xpath):        result = {}        for child in list(xml_node):            if child.tag not in result:                result[child.tag] = [{"node":child, "xpath": "%s/%s[%s]" % (node_xpath, child.tag, 1)}]            else:                result[child.tag].append({"node":child, "xpath": "%s/%s[%s]" % (node_xpath, child.tag, len(result[child.tag])+1)})        return result    result = []    children_of_node1_dict = get_node_children(xml_node1, node1_xpath)    children_of_node2_dict = get_node_children(xml_node2, node2_xpath)    temp_list1 = []    temp_list2 = []    for child_tag, child_node_list in children_of_node1_dict.items():        second_child_node_list = children_of_node2_dict.get(child_tag, [])        if not second_child_node_list:            # 获取xml1中比xml2中多出的子结点            for i in range(0, len(child_node_list)):                temp_list1.append("%s/%s[%s]" % (node1_xpath, child_node_list[i]["node"].tag, i+1))            continue        for first_child, second_child in zip(child_node_list, second_child_node_list):            result.extend(compare_xml_nodes(first_child["node"], second_child["node"], first_child["xpath"], second_child["xpath"]))        # 获取xml2中对应结点比xml1中对应结点多出的同名子结点        for i in range(len(child_node_list), len(second_child_node_list)):            temp_list2.append("%s/%s[%s]" % (node2_xpath, second_child_node_list[i]["node"].tag, i+1))        children_of_node2_dict.pop(child_tag)    if temp_list1:        result.append("子结点不一样:xml1结点(xpath:{xpath1})比xml2结点(xpath:{xpath2})多了以下子结点:\n{differences}".format (xpath1=node1_xpath,                                                                                                  xpath2=node2_xpath,                                                                                                  differences="\n".join(temp_list1)))    # 获取xml2比xml1中多出的子结点    for child_tag, child_node_list in children_of_node2_dict.items():        for i in range(0, len(child_node_list)):            temp_list2.append("%s/%s[%s]" % (node1_xpath, child_node_list[i]["node"].tag, i+1))    if temp_list2:        result.append("子结点不一样:xml1结点(xpath:{xpath1})比xml2结点(xpath:{xpath2})少了以下子结点:\n{differences}".format (xpath1=node1_xpath,                                                                                                  xpath2=node2_xpath,                                                                                                  differences="\n".join(temp_list2)))    return resultdef compare_xml_nodes(xml_node1, xml_node2, node1_xpath="", node2_xpath=""):    result = []    # 比较标签    if xml_node1.tag !=  xml_node2.tag:        result.append("标签不一样:xml1结点(xpath:{xpath1}):{tag1},xml2结点(xpath:{xpath2}):{tag2}".format (xpath1=node1_xpath,                                                                                                  tag1=xml_node1.tag,                                                                                                  xpath2=node2_xpath,                                                                                                  tag2=xml_node2.tag))    # 比较文本    if xml_node1.text !=  xml_node2.text:        result.append("文本不一样:xml1结点(xpath:{xpath1}):{text1},xml2结点(xpath:{xpath2}):{text2}".format (xpath1=node1_xpath,                                                                                                  tag1=xml_node1.text or "",                                                                                                  xpath2=node2_xpath,                                                                                                  tag2=xml_node2.text or ""))    # 比较属性    res = compare_xml_node_attributes(xml_node1, xml_node2)    if res:        result.append("属性不一样:xml1结点(xpath:{xpath1}),xml2结点(xpath:{xpath2}):\n{differences}".format (xpath1=node1_xpath,                                                                                                  xpath2=node2_xpath,                                                                                                  differences="\n".join(res)))    # 比较子结点    res = compare_xml_node_children(xml_node1, xml_node2, node1_xpath, node2_xpath)    if res:        result.extend(res)    return resultdef compare_xml_strs(xml1_str, xml2_str, mode=3):    """    @param: mode 比较模式,预留,暂时没用。目前默认 xml 子元素如果为列表,则列表有序列表,按序比较    """    root1 = ET.fromstring(xml1_str.strip())    root2 = ET.fromstring(xml2_str.strip())    return compare_xml_nodes(root1, root2, "/%s" % root1.tag, "/%s" % root2.tag)

测试运行

xml_str1 = """            1        2008        141100                                4        2011        59900                        68        2011        13600                    """xml_str2 = """            1        2008        141100                                4        2011        59900                        68        2011        13600                    """xml_str3 = """            1        unknow        sz                                2        unknown                                3        unknown        other addr                    """xml_str4 = """            unknow        sz                                unknown        other addr                    """if __name__ == "__main__":    res_list = compare_xml_strs(xml_str1, xml_str2)    if res_list:        print("xml1和xml2不一样:\n%s" % "\n".join(res_list))    else:        print("xml1和xml2一样")    res_list = compare_xml_strs(xml_str3, xml_str4)    if res_list:        print("xml3和xml4不一样:\n%s" % "\n".join(res_list))    else:        print("xml3和xml4一样")

运行结果

xml1和xml2一样xml3和xml4不一样:子结点不一样:xml1结点(xpath:/data/class[1])比xml2结点(xpath:/data/class[1])多了以下子结点:/data/class[1]/rangk[1]属性不一样:xml1结点(xpath:/data/class[2]/book[2]),xml2结点(xpath:/data/class[2]/book[2]):结点1属性:price 值:15,结点2属性:price 值:16子结点不一样:xml1结点(xpath:/data/class[2])比xml2结点(xpath:/data/class[2])多了以下子结点:/data/class[2]/rangk[1]子结点不一样:xml1结点(xpath:/data/class[2])比xml2结点(xpath:/data/class[2])少了以下子结点:/data/class[2]/addr[1]

关键词: 测试环境 测试运行