Sh/Bash 技巧

使用 `time` 命令计算脚本执行时间

time bash /path/to/your/script.sh

参考：How to get execution time of a script effectively?

`crontab` 下将命令输出存入日志文件

*/2 * * * * /usr/bin/bash /path/to/some/bash/script.sh >> /path/to/log/file 2>&1

参考链接: How to redirect output to a file from within cron?

使用 `curl` 定时从 FTP 服务器下载数据脚本

#!/usr/bin/env bash

# v0.0.6
#
# Rewrite the way of deciding whether URI is a remote directory, or a file.
# now the list_r_dir is reliable.


# v0.0.5
#
# Fixed a bug when remote has multiple levels dir.
# Refactored the downloading into download and batch_download functions.
# Changed find -maxdepth to 5, in case of remote hierarchy is very deep.

# v0.0.4
#
# Optimized the downloading process. Not it's less time consuming, and less
# network operations.

# v0.0.3
#
# Refactored, removed some ugly if...then...fi

FTP_SERVER="ftp://10.11.1.9"
TARGET_DIR="/home/fpga_bj.xfoss/bin_files"
CRED="ftp_username:your_secret"
END_SLASH="/"

declare -A r_files
declare -A l_files

if [ ! -z "$TARGET_DIR" ] && [ ! -d "${TARGET_DIR}" ]; then mkdir -p "${TARGET_DIR}"; fi

function init_r_files() {
	if [ ${#r_files[@]} -eq 0 ]; then return 0; fi
	for i in "${!r_files[@]}"; do unset r_files["$i"]; done
}

function show_r_files() {
	if [ ${#r_files[@]} -eq 0 ]; then return 0; fi
	for i in "${!r_files[@]}"; do echo "$i: ${r_files[$i]}"; done
}

function list_r_dir() {
	uri="$FTP_SERVER/$1"
	l=$(curl -s -u "$CRED" "$uri" -l)

	if [ -z "$l" ]; then return 0; fi
	for f in $l; do
		if [ "$1" = "" ]; then
			sub_loc="$f/"
			f_uri="$f"
		else
			sub_loc="$1$f/"
			f_uri=$(echo "$1$f" | tr -s '/')
		fi

		sub_uri="$FTP_SERVER/$sub_loc"
		sub_uri=${sub_uri%"$END_SLASH"}

		# Fetch target size, for deciding whether it's a dir or file.
		content_length=$(curl --silent -I -u "$CRED" "$sub_uri" | grep 'Content-Length' | awk -F " " '{print $2}')
		
		# Case: target is a dir.
		if [ -z "$content_length" ]; then
			list_r_dir "$sub_loc"
			continue;
		fi

		# fetch remote file md5
		r_md5=$(curl -sL -u "${CRED}" "${FTP_SERVER}/${f_uri}" | md5sum | cut -d ' ' -f 1)
		r_files["$f_uri"]="$r_md5"
	done
}

function init_l_files() {
	if [ ${#l_files[@]} -eq 0 ]; then return 0; fi
	for key in "${!l_files[@]}"; do unset l_files["$key"]; done
}

function show_l_files() {
	if [ ${#l_files[@]} -eq 0 ]; then return 0; fi
	for key in "${!l_files[@]}"; do echo "$key: ${l_files[$key]}"; done
}

function list_l_files() {
	# Find all files except hidden dictories
	#
	# Ref: https://stackoverflow.com/a/18360093
	files=$(find "$TARGET_DIR" -maxdepth 5 -type f -not -path '*/.*')
	for file in $files; do
        prefix="$(realpath ${TARGET_DIR})$END_SLASH"

		# Remove prefix from a string
		# Ref: https://stackoverflow.com/a/16623897
		rel_path=${file#"$prefix"}
		l_md5=$(md5sum ${file} | awk '{ print $1 }')
		l_files["${rel_path}"]="$l_md5"
	done
}

function download() {
	full_path="$TARGET_DIR/$1"
	uri="$FTP_SERVER/$1"
	dir=$(dirname "${full_path}")

	if [ ! -d "$dir" ]; then mkdir -p "$dir"; fi
	echo "$(date '+%Y-%m-%d %H:%M:%S') - downloading $path"
	/usr/bin/curl -s -u "$CRED" "$uri" -R -o "$full_path"
}

function batch_download() {
	if [ ${#r_files[@]} -eq 0 ]; then return 0; fi
	
	# Case: initial download, TARGET_DIR empty
	if [ ${#l_files[@]} -eq 0 ]; then
		for path in "${!r_files[@]}"; do download "$path"; done
		return 0;
	fi

	for path in "${!r_files[@]}"; do
		r_md5="${r_files[$path]}"

		for l_path in "${!l_files[@]}"; do
			l_md5="${l_files[$l_path]}"

			# Case: file already exists and not change.
			if [ "$path" = "$l_path" ] && [ "$r_md5" = "$l_md5" ]; then continue 2; fi
			# Case: file exists but changed.
			if [ "$path" = "$l_path" ]; then break; fi
		done

		# Cases: file exists but changed, and file not exists.
		download "$path"
	done
}

function clean_up() {
	if [ ${#l_files[@]} -eq 0 ] || [ ${#r_files[@]} -eq 0 ]; then return 0; fi

	for l_path in "${!l_files[@]}"; do
		f=$(realpath "${TARGET_DIR}/$l_path")

		for r_path in "${!r_files[@]}"; do
			if [ "$l_path" = "$r_path" ]; then continue 2; fi
		done

		echo "$(date '+%Y-%m-%d %H:%M:%S') - deleting $f"
		rm -rf "$f"
	done

	# Find empty directories and delete them
	#
	# Ref: https://stackoverflow.com/a/2811135
	find "$TARGET_DIR" -depth -type d -empty -delete
	#
	# Except hidden dirs/files
	# Ref: https://askubuntu.com/a/318211
	# find "$1" -depth -type d -empty -not -path '*/.*' -delete
}

init_l_files
init_r_files

# List remote files
list_r_dir
# List local files
list_l_files

# echo -e "-- Remote files --\n"
# show_r_files

# echo -e "-- Local files --\n"
# show_l_files


# Downloading files only need to be downloaded
batch_download

# Remove local any file and dir which not exists on remote
clean_up

统计到指定 IP 地址 `ping` 延迟脚本

此脚本每 10 分钟向特定 IP 地址进行 ping 操作，并获取结果中的 avg 信息记录到文本文件中。

#!/usr/bin/env bash

# Parameter list:
#   $1 - target ip address
#   $2 - target name

TARGET_IP=$1
TARGET_NAME=$2

record="delay_${TARGET_IP}_${TARGET_NAME}-$(date '+%Y%m%d').txt"

if [ -f "$record" ]; then rm -rf $record; fi

touch $record
echo -e "\nDelay to ${TARGET_IP}(${TARGET_NAME}) based upon \`ping\` command\n\n\t---------------------------" > $record

delay_sampling () {
    delay_average=$(ping -c 4 $1 | tail -1 | awk '{print $4}' | cut -d '/' -f 2)
    ping_at=$(date '+%Y-%m-%d %H:%M:%S')

    echo -e "Delay at $ping_at\t- $delay_average ms" >> $2
}

i=0
while [ $i -lt 144 ]; do
    delay_sampling $TARGET_IP $record
    i=$((i+1))
    sleep 600
done

exit 0

参考：extract average time from ping -c

在当前目录及其所有子目录下，查找并删除所有某类型的文件

$ find . -name '*.pyc'
$ find . -name '*.pyc' -delete

批量删除文件夹（保留想要的文件夹）

ls | grep -w -v -e "lenny" -e "sw" | xargs rm -rf

注：其中当前文件夹下，匹配 lenny 与 sw 的文件夹将被保留。

替换文件夹中所有文件名包含 `log` 文件的字符串

ls | grep log | xargs sed -i 's/text-wrap: wrap;/text-wrap: wrap; white-space: pre-wrap;/g'

注：将匹配当前文件夹下，文件名中包含 log 的文件，并将这些文件中的 text-wrap: wrap; 字符串，替换为 text-wrap: wrap; white-space: pre-wrap;，实现 HTML 的 pre 元素，在 Chrome（WebKit）与 Firefox(Gecko) 下，都能自动断行。

建立与更新软链接

// 建立软链接
$ ln -s File link
// 更新软链接
$ ln -vfns File1 link

代码片段与技术笔记，code snippets & tech notes