zl程序教程

您现在的位置是:首页 >  后端

当前栏目

使用python实现MySQL表结构比对【初级版】

Pythonmysql 实现 使用 结构 初级
2023-06-13 09:17:28 时间

使用Python写了个最基本的表结构比对告警脚本, 目前这个版本只能做到发现表结构存在差异的表。后续考虑再迭代下,看如何把差异的表和列的明细也报出来。

configs.py 内容如下:

# MySQL数据源的信息
mysql_source_host = "localhost"
mysql_source_port = "3306"
mysql_source_user = "dts"
mysql_source_pass = "dts"
mysql_source_db = "src_db"

# MySQL目标库的信息
mysql_dest_host = "localhost"
mysql_dest_port = "3306"
mysql_dest_user = "dts"
mysql_dest_pass = "dts"
mysql_dest_db = "dest_db"

main.py 内容如下:

# pip3 install mysql-connector-python==8.0.31

import mysql.connector
import time
import configs
import hashlib

start_time = time.time()

# 源端
source_db = mysql.connector.connect(
    host=configs.mysql_source_host,
    port=configs.mysql_source_port,
    user=configs.mysql_source_user,
    passwd=configs.mysql_source_pass,
)
source_cursor = source_db.cursor()

# 目标端
dest_db = mysql.connector.connect(
    host=configs.mysql_dest_host,
    port=configs.mysql_dest_port,
    user=configs.mysql_dest_user,
    passwd=configs.mysql_dest_pass,
)
dest_cursor = dest_db.cursor()


# 1 获取源端的表清单
get_src_tb_list = (
        "SELECT table_name FROM information_schema.tables WHERE table_schema="
        + "'" + configs.mysql_source_db + "'"
)
source_cursor.execute(get_src_tb_list)
src_tb_result = source_cursor.fetchall()

src_tb_set = set()
for x in src_tb_result:
    tb_name = x[0]
    src_tb_set.add(tb_name)
print(f"源端的表明细:", src_tb_set)

# 2 获取目标端的表清单
get_dest_tb_list = (
        "SELECT table_name FROM information_schema.tables WHERE table_schema="
        + "'" + configs.mysql_dest_db + "'"
)
dest_cursor.execute(get_dest_tb_list)
dest_tb_result = dest_cursor.fetchall()

dest_tb_set = set()
for x in dest_tb_result:
    tb_name = x[0]
    dest_tb_set.add(tb_name)
print(f"目标端的表明细:", dest_tb_set)

print("----------- 计算源端和目标端的差集 --------------------")
print(f"dest比src多的表:", list(dest_tb_set - src_tb_set))
print(f"src比dest多的表:", list(src_tb_set - dest_tb_set))


# print("------------ 开始比较每个表的每个列信息 -------------------")

base_sql = '''
SELECT
 table_name,
  column_name,
 CASE WHEN `COLUMN_DEFAULT` IS NULL THEN '' ELSE `COLUMN_DEFAULT` END AS `COLUMN_DEFAULT` ,
  IS_NULLABLE,
  COLUMN_TYPE,
  COLUMN_KEY,
  EXTRA
FROM
  information_schema.columns where table_schema = 
'''

source_chksum = dict()
dest_chksum = dict()

# 3 采集源库数据
for i in src_tb_set:
    get_src_tb_column_detail = (
            base_sql
            + "'" + configs.mysql_source_db + "'" + "AND table_name=" + "'" + str(
        i) + "' ORDER BY ordinal_position ASC;"
    )

    source_cursor.execute(get_src_tb_column_detail)
    src_tb_column_result = source_cursor.fetchall()

    chk_sum = hashlib.md5(str(src_tb_column_result).replace(" ", "").encode()).hexdigest()
    source_chksum[i] = chk_sum

# 4 采集目标库信息
for i in dest_tb_set:
    get_dest_tb_column_detail = (
            base_sql
            + "'" + configs.mysql_dest_db + "'" + "AND table_name=" + "'" + str(i) + "' ORDER BY ordinal_position ASC;"
    )

    dest_cursor.execute(get_dest_tb_column_detail)
    dest_tb_column_result = dest_cursor.fetchall()

    chk_sum = hashlib.md5(str(dest_tb_column_result).replace(" ", "").encode()).hexdigest()
    dest_chksum[i] = chk_sum


# 5 进行集合运算
print("-------------- 结果统计 ------------------")
if source_chksum != dest_chksum:
    s1 = set()
    differ = set(source_chksum.items()) ^ set(dest_chksum.items())

    for i in differ:
        # 如果要排除掉src和dest 存在差集的表(背景:有时候源库src已经建好表,但是尚未发布到生产dest去,这种情况下就出现了二者表的数量不一样多),用下面这种写法
        # if i[0] not in list(dest_tb_set  - src_tb_set ) and i not in list(src_tb_set - dest_tb_set):
        #     s1.add(i[0])

        # 如果要全部都报出来,用下面这种写法
        s1.add(i[0])

    print('表结构(只比对了列和索引,未包括字符集和排序集)检查完成,存在差异的表如下 ---> ', s1)
else:
    print('表结构(只比对了列和索引,未包括字符集和排序集)检查完成,没有发现存在差异的表')

stop_time = time.time()
time_dur = stop_time - start_time
print(f"耗时 {time_dur} 秒")

readme 内容如下:

上面程序里面,我们没有比对表中的字符集和排序集。

如果要比较字符集使用下面的:
base_sql = '''
SELECT
 table_name,
  column_name,
 CASE WHEN `COLUMN_DEFAULT` IS NULL THEN '' ELSE `COLUMN_DEFAULT` END AS `COLUMN_DEFAULT` ,
  IS_NULLABLE,
  COLUMN_TYPE,
  CASE WHEN CHARACTER_SET_NAME IS NULL THEN '' ELSE CHARACTER_SET_NAME END AS CHARACTER_SET_NAME ,
  CASE WHEN COLLATION_NAME IS NULL THEN '' ELSE COLLATION_NAME END AS COLLATION_NAME ,
  COLUMN_KEY,
  EXTRA
FROM
  information_schema.columns where table_schema =
'''


如果不比较字符集,使用下面的:
base_sql = '''
SELECT
 table_name,
  column_name,
 CASE WHEN `COLUMN_DEFAULT` IS NULL THEN '' ELSE `COLUMN_DEFAULT` END AS `COLUMN_DEFAULT` ,
  IS_NULLABLE,
  COLUMN_TYPE,
  COLUMN_KEY,
  EXTRA
FROM
  information_schema.columns where table_schema =
'''

使用方法: 修改完成 configs.py后,执行  python3 main.py 即可