zdqueue – script to find deleted files still using space on ZFS

Always a strange situation when ZFS shows you a full filesystem and you know that there should be enough free space. One reason could be a big file you did not think about. I wrote a small script to find the biggest file on a ZFS, you can find my zfsize-script in my previous post.
Another problem could be a deleted file which is still in access by a process. The file is gone and you do not see it in the filesystem with ls/du/find and so on… You will only get your free space when the process stops using the file or you kill the process.
I wrote a small script to find such processes and old files which are still in the ZFS delete queue.

root@solaris:~/scripts# ./zdqueue.sh -h
ZFS Delete Queue Analyzing
Usage:
                ./zdqueue.sh -z <ZFS> [-o tempdir]
root@solaris:~/scripts#
root@solaris:~/scripts# ./zdqueue.sh -z oracle/u01
ZFS = oracle/u01
Mountpoint = /u01
TempDir = /tmp
This may take a while ...
I will wait at least 1 minute before analyzing
............
  PID TTY         TIME CMD
 2703 pts/10      0:43 pfiles
Still analyzing process list...
Do you want to wait another minute or work with the data we have? (y/n) n
OK, I will kill process 2703 and work with gathered information
---------------------------------------
Process: 709    /u01/app/12.1.0.2/grid/bin/oraagent.bin
The file was:   /u01/app/grid/diag/crs/orasid/crs/trace/crsd_oraagent_oracle.trc

Process: 595    /u01/app/12.1.0.2/grid/bin/orarootagent.bin
The file was:   /u01/app/grid/diag/crs/orasid/crs/trace/crsd_orarootagent_root.trc





#!/usr/bin/bash
#set -x 
###################################################
#
# zdqueue v0.1
#
# ZFS Delete Queue Analyzing
#
# small script to find open files on ZFS which 
# should be deleted but are still using space.
#
# 16.09.2016, written by Martin Presslaber
#
###################################################
help ()
{
		print "ZFS Delete Queue Analyzing"
                print "Usage:"
                print "\t\t$0 -z <ZFS> [-o tempdir]"
}
########## preTESTS #############
OS=`uname -s`
RELEASE=`uname -r`
VERS=`uname -v`
ZONE=`zonename`
if [[ $OS != SunOS ]]
then
        print "This script will only work on Solaris"
        exit 1
fi
[[ $ZONE == global ]] || print "This script will only work in the global zone"
[[ $VERS == 1[1-9].[1-9] ]] && SOLARIS=new
if [ ${RELEASE#*.} -gt 10 ] ;
then
        ID=$(/usr/bin/whoami)
else
        ID=$(/usr/ucb/whoami)
fi
if [ $ID != "root" ]; then
        echo "$ID, you must be root to run this program."
        exit 1
fi
if [ $# -lt 1 ]
        then
                help && exit 1
        fi
########## Options ###########
TEMPDIR="/tmp"
while getopts "z:o:h" args
do
	case $args in
	z)
		ZFS=$OPTARG
		ZFSlist=`zfs list $ZFS 2>/dev/null | nawk -v ZFS=$ZFS '$1~ZFS {print $0}'`
		[[ $ZFSlist == "" ]] && print "$ZFS does not seem to be a ZFS" && exit 1
		ZFSmountpoint=`zfs list $ZFS 2>/dev/null | nawk -v ZFS=$ZFS '$1~ZFS {print $NF}'`
	;;

	o)
	TEMPDIR=$OPTARG
	[[ -d $TEMPDIR ]] || print "$TEMPDIR does not exist!" && exit 1
	;;

	h|*)
		help && exit 1
	;;
	esac
done
shift $(($OPTIND -1))
sleeping ()
{
SLEEP=1;  while [[ SLEEP -ne 12 ]]; do sleep 5 ; print ".\c" ; let SLEEP=$SLEEP+1; done ; print "."
}
######### Let's go #########
print "ZFS = $ZFS"
print "Mountpoint = $ZFSmountpoint"
print "TempDir = $TEMPDIR"
print "This may take a while ... "
print "I will wait at least 1 minute before analyzing"
######## Create File with open delete queue
zdb -dddd $ZFS $(zdb -dddd $ZFS 1 | nawk '/DELETE_QUEUE/ {print $NF}') > $TEMPDIR/zdqueue-open.tmp
######## Find processes with files from delete queue
OPENFILES=$(nawk '/\= / {print $NF}' $TEMPDIR/zdqueue-open.tmp | while read DQi; do echo "$DQi|\c"; done | nawk '{print $4 $NF}')

[[ $OPENFILES == "" ]] && print "No files in delete queue for $ZFS" && exit 0

pfiles `fuser -c $ZFSmountpoint 2>/dev/null` 2>/dev/null > $TEMPDIR/zdqueue-procs.tmp &
PIDpfiles=$!
sleeping 
ps -p $PIDpfiles && \
WAIT=yes
while [[ $WAIT == yes ]]
do 
	print "Still analyzing process list..."
	read -r -p "Do you want to wait another minute or work with the data we have? (y/n) " A
	case $A in
	[yY][eE][sS]|[yY])
	sleeping
	ps -p $PIDpfiles && \
	WAIT=yes
	;;
	[nN][oO]|[nN])
	print "OK, I will kill process $PIDpfiles and work with gathered information"
	kill $PIDpfiles
	WAIT=n
	;;	
	esac
done
print "---------------------------------------"
egrep $OPENFILES $TEMPDIR/zdqueue-procs.tmp | tr ':' ' ' | awk '$7 ~ /ino/ {print $8}' |\
while read INO
do 
	print "Process: \c"
	awk '/Current/ {print PROC};{PROC=$0} /ino/ {print $5}' $TEMPDIR/zdqueue-procs.tmp |\
	tr ':' ' ' | nawk -v INO=$INO '$1 ~ /^[0-9]/ {print $0} $2 ~ INO {print $0}' |\
	nawk '$1 ~ /ino/ {print INO};{INO=$0}'
	ZID=`nawk -v INO=$INO '$3 ~ INO {print $1}' $TEMPDIR/zdqueue-open.tmp`
	if [[ $SOLARIS == new ]]
	then
		print "The file was:   \c"
		echo "::walk zfs_znode_cache | ::if znode_t z_id = $ZID and z_unlinked = 1 | ::print znode_t z_vnode->v_path" |\
		mdb -k | awk '/\// {print $NF}' | sed 's/\"//g'
	else
		print "The file was:   \c"
		echo "::walk zfs_znode_cache z | ::print znode_t z_id | ::grep ".==$ZID" | ::map <z | ::print znode_t z_vnode->v_path z_unlinked" |\
		mdb -k | awk '/\// {print $NF}' | sed 's/\"//g'
	fi
	print "\n"
done

#### Clean up ####
rm /$TEMPDIR/zdqueue-procs.tmp 
rm /$TEMPDIR/zdqueue-open.tmp
#################### EOF ####################

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.