主要解决问题1传统GBase8a数据库扫描空洞表需要扫描元数据表效率较低。2filefrag只读取操作系统元数据效率较高。使用方式1将脚本部署在一个复合节点的/home/gbase/sweep下2修改参数usergbase passwd****** threads_num2 #并发数 suffixn1 #一般只查一个分片即可 avg_sum2 #一般每个分片文件2个以上碎片即认为有空洞3直接运行脚本 sh sweep.sh验证效果程序会自动在/home/gbase/sweep/log下创建若干找到的表的名称(命名方式) 库名.表名参考文件#!/bin/bash #author: Bryan zhao #date: 2025-07-29 usergbase passwd****** threads_num2 #并发数 suffixn1 #一般只查一个分片即可 avg_sum2 #一般每个分片文件2个以上碎片即认为有空洞 tb.list if [ ! -d pwd/log ]; then mkdir -p pwd/log else rm -rf pwd/log mkdir -p pwd/log fi cli${GCLUSTER_HOME}/bin/gbase -u${user} -p${passwd} -N $cli -eSELECT dbname,tbname FROM gbase.table_distribution where dbname not in (information_schema,performance_schema,gbase,gctmpdb,gclusterdb) tb.list child() { local dbname$1 tbname$2 suffix$3 avg_sum$4 local mulu${GBASE_BASE}/userdata/gbase/${dbname}/sys_tablespace/${tbname}_${suffix} [ ! -d $mulu ] { echo Error: Directory $mulu not found; return 1; } local avg_extents$(find $mulu -type f -exec filefrag -v {} \; 2/dev/null | grep extent | awk {sum$2} END {print (NR0)? sum/NR : 0}) if (( $(echo $avg_extents $avg_sum | bc -l) )); then touch pwd/log/${dbname}.${tbname} fi } tmp_fifofile/tmp/$$_date %N.fifo mkfifo $tmp_fifofile exec 6$tmp_fifofile rm -rf $tmp_filofile for i in seq 1 ${threads_num} do echo done6 while read -r db table; do read -u6 { child ${db} ${table} ${suffix} ${avg_sum} echo 6 } done tb.list wait exec 6-