Files
easyzsh/zshrc/prep_env.sh
2025-12-18 17:42:46 +08:00

316 lines
11 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
############################################################
# alias
############################################################
alias launch='mlx worker launch'
alias run='launch'
##################
# history
##################
export HISTSIZE=100000
export HISTFILESIZE=200000
# 2. 在关闭终端时追加历史记录,而不是覆盖
[[ -n "${BASH_VERSION}" ]] && shopt -s histappend
# 3. 忽略重复及以空格开头的命令
export HISTCONTROL=ignoreboth
# 4. 忽略特定的常用命令
# export HISTIGNORE="&:[bf]g:exit:pwd:clear:history"
# 5. 为命令添加时间戳
export HISTTIMEFORMAT="%F %T "
# 6. 在每次命令后立即写入历史,实现多终端实时共享
# 注意:这可能会对性能有轻微影响,但对于大多数现代计算机来说可以忽略不计
# export PROMPT_COMMAND="history -a; history -c; history -r; $PROMPT_COMMAND"
##################
# set ulimit
##################
ulimit -n 1024768
############################################################
# 兼容 IGNOREEOF
############################################################
# bash: 连续两次 Ctrl-D 才退出
export IGNOREEOF=2
# zsh: 对应的 setopt
if [[ -n "${ZSH_VERSION}" ]]; then
setopt IGNORE_EOF 2>/dev/null || true
fi
############################################################
# MLX / Merlin 相关环境(从 mlxrc & start_devbox 合并)
############################################################
# Kerberos cache 位置
export KRB5CCNAME="${KRB5CCNAME:-FILE:/opt/tiger/.krb5cc_${UID}}"
# MLX / Notebook 相关 PYTHONPATH
# 在已有 PYTHONPATH 前追加 /mlx/workspace:/tmp/mlx/workspace:/opt/tiger:/opt/tiger/lite_sdk
# 并确保 mlx_notebook_pysdk 路径存在
if [[ -z "${PYTHONPATH}" ]]; then
export PYTHONPATH="/mlx/workspace:/tmp/mlx/workspace:/opt/tiger:/opt/tiger/lite_sdk:/opt/tiger/mlx_notebook_pysdk/mlx-pysdk"
else
export PYTHONPATH="/mlx/workspace:/tmp/mlx/workspace:/opt/tiger:/opt/tiger/lite_sdk:${PYTHONPATH}:/opt/tiger/mlx_notebook_pysdk/mlx-pysdk"
fi
# pyspark 相关
export PYSPARK_PYTHON="${PYSPARK_PYTHON:-/usr/bin/python3}"
export PYSPARK_DRIVER_PYTHON="${PYSPARK_DRIVER_PYTHON:-/usr/bin/python3}"
# 默认 local[2],和原 mlxrc 保持一致
export PYSPARK_SUBMIT_ARGS="${PYSPARK_SUBMIT_ARGS:---master local[2] pyspark-shell}"
# MLXLAB devbox code disk 标记(如果启动脚本已经设置,就不要覆盖)
if [[ -n "${MLXLAB_DEVBOX_CODE_DISK}" ]]; then
export MLXLAB_DEVBOX_CODE_DISK=''
fi
# MLX / Merlin 相关变量(假设在容器启动时已经由 start_devbox.sh 导出)
# 这里统一再 export 一遍,保证交互 shell 也能继承
export MLX_USER_TOKEN='bd2a3f1b71d54ab497f13f2eefc4f5bc'
export MLX_USER='qingyuhao'
export MLX_IMAGE_VID='d51qjdbc77u9kebl603g'
export MLX_IMAGE_URL='hub.byted.org/reckon/data.reckon.mlx.image_10786:4aa09c59c9cbb1fcde50526696177c84'
export MLX_ENGINE_SID='bgi0zldy6943b7db'
export MLX_DEVBOX_ID='132783'
export MLX_HOST_URL='https://ml.bytedance.net/'
export MERLIN_HOST_URL='https://ml.bytedance.net'
export TCC_CDN_HOST='//lf6-config.bytetcc.com/obj/tcc-config-web'
export ZTI_ENV='cn'
export IS_SEED='true'
export MLXLAB_DEVBOX_CODE_DISK=''
# DOAS_ZONE从 BYTE_REGION 推导)
if [[ "${BYTE_REGION}" == "CN" ]]; then
export DOAS_ZONE="cn"
else
export DOAS_ZONE="row"
fi
# SEC_KV_AUTH
export SEC_KV_AUTH=1
# 统一补全 NO_PROXY / no_proxy
# 基础域名列表
_base_no_proxy=".byted.org,byted.org,.bytedance.net,bytedance.net"
for _v in NO_PROXY no_proxy; do
if [[ -n "${BASH_VERSION}" ]]; then
_cur="${!_v}"
else
_cur="${(P)_v}"
fi
if [[ -z "${_cur}" ]]; then
export ${_v}="${_base_no_proxy}"
else
# 如果已经包含基础域名,就不重复追加
if [[ "${_cur}" == *".byted.org"* || "${_cur}" == *".bytedance.net"* ]]; then
export ${_v}="${_cur}"
else
export ${_v}="${_base_no_proxy},${_cur}"
fi
fi
done
unset _v _cur _base_no_proxy
# IS_SEED: 禁用代理(和原来写进 mlxrc 的逻辑保持一致)
if [[ "${IS_SEED}" == "true" ]]; then
unset http_proxy HTTP_PROXY https_proxy HTTPS_PROXY
fi
############################################################
# /tmp/krb5cc_0 软链(从 mlxrc 挪过来)
############################################################
if [[ ! -e /tmp/krb5cc_0 && ! -L /tmp/krb5cc_0 ]]; then
ln -s /opt/tiger/.krb5cc_0 /tmp/krb5cc_0 2>/dev/null || true
fi
############################################################
# for hdfs原 prep_env.sh 中的逻辑)
############################################################
if ! grep -q "source import_hdfs_envs.sh" ~/.bashrc 2>/dev/null
then
if command -v import_hdfs_envs.sh > /dev/null 2>&1 && [[ $(which import_hdfs_envs.sh) != *"pyenv/"* ]]; then
source import_hdfs_envs.sh
else
echo "pyenv env, skip hdfs init, please refer to: https://bytedance.larkoffice.com/docx/T9IrdzKRSomwOOxqbUOcRZyLnqh to do hdfs init manually"
fi
fi
############################################################
# 代理相关
############################################################
unset HTTPS_PROXY
############################################################
# 一次性加载 workspace 级别 env
############################################################
if [[ "$WORKSPACE_ENVS_SET" != "1" ]]; then
envfile="/etc/.container_env"
if [[ -f $envfile ]]; then
while IFS= read -r line; do
_varname="${line%%=*}"
if eval "[[ -z \"\${${_varname}+x}\" ]]"; then # env is not set
export "${_varname}=${line#*=}"
fi
done <"$envfile"
unset _varname
fi
if [[ -n "${ZSH_VERSION}" ]]; then
cur_dir="$(cd "$(dirname "$0")" && pwd)"
if [[ "$ARNOLD_WORKSPACE_BUILD_ENV" != "dev" ]]; then
source "$cur_dir"/zsh_hadoop.sh
fi
else
cur_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
if [[ "$ARNOLD_WORKSPACE_BUILD_ENV" != "dev" ]]; then
source "$cur_dir"/bash_hadoop.sh
fi
fi
source "$cur_dir"/common_env.sh
# 原来这里设置 PATH=/workspace:$PATH下面会统一 double-check PATH再保证 /workspace 在 PATH 里
export PATH=/workspace:$PATH
# 从 code-server 进来的 WORKSPACE_PASSWORD
if [[ -z "${WORKSPACE_PASSWORD}" ]]; then
export WORKSPACE_PASSWORD="$(ps -ef | grep -oP 'vscode-token\ \K.*' | head -n 1)"
fi
export WORKSPACE_ENVS_SET=1
fi
############################################################
# Double check PATH
# 保证以下目录全部在 PATH 中(合并了原 prep_env.sh + mlxrc 的目录)
############################################################
for i in \
"$HOME/.local/bin" \
"$HOME/bin" \
"/workspace" \
"/usr/local/bvc/bin" \
"/opt/tiger/arnold/bin" \
"/opt/tiger/consul_deploy/bin/go" \
"/opt/tiger/spark_deploy/spark-3.2/spark-stable/bin" \
"/opt/mlx_deploy/miniconda3/envs/mlx/bin" \
"/opt/tiger/mlx_deploy" \
"/opt/tiger/ss_bin" \
"/opt/tiger/ss_lib/bin" \
"/opt/common_tools" \
"/opt/tiger/yarn_deploy/hadoop/bin" \
"/opt/tiger/yarn_deploy/hive/bin"
do
if [[ ":$PATH:" != *":$i:"* ]]; then
PATH="$i:$PATH"
fi
done
# DEVBOX_PATH 单独处理一下(可能是一个长路径)
if [[ -n "${DEVBOX_PATH}" && ":$PATH:" != *":$DEVBOX_PATH:"* ]]; then
PATH="${DEVBOX_PATH}:$PATH"
fi
export PATH
############################################################
# worker 级别 env原 prep_env.sh
############################################################
worker_envfile="/etc/worker_envs_$ARNOLD_WORKER_ID"
user_worker_envfile="$HOME/.worker_envs/worker_envs_$ARNOLD_WORKER_ID"
if [[ -f $user_worker_envfile ]]; then
while IFS= read -r line; do
_varname="${line%%=*}"
if eval "[[ -z \"\${${_varname}+x}\" ]]"; then # env is not set
export "${_varname}=${line#*=}"
fi
done <"$user_worker_envfile"
unset _varname
elif [[ -f $worker_envfile ]]; then
while IFS= read -r line; do
_varname="${line%%=*}"
if eval "[[ -z \"\${${_varname}+x}\" ]]"; then # env is not set
export "${_varname}=${line#*=}"
fi
done <"$worker_envfile"
unset _varname
fi
############################################################
# PS1原 prep_env.sh
############################################################
if [[ -z "${ZSH_VERSION}" ]]; then
if [[ -n "${ARNOLD_WORKER_ID}" ]]; then
export PS1='\[\033[01;32m\]\u@$ARNOLD_WORKER_ID.worker:\W\[\033[00m\]\$ \[\]'
elif [[ -n "${ARNOLD_WORKSPACE_ID}" ]]; then
export PS1='\[\033[01;34m\]\u@$ARNOLD_WORKSPACE_ID.master:\W\[\033[00m\]\$ \[\]'
else
export PS1='\u@\h:\W\$ '
fi
fi
############################################################
# 从 镜像中继承 PYTHONPATH、PATH、LD_LIBRARY_PATH 等关键环境变量
############################################################
envfile="/etc/.env"
env_white_list="PYTHONPATH PATH LD_LIBRARY_PATH" # 需要去重的环境变量列表
# 定义一个函数,用于对冒号分隔的环境变量去重
deduplicate_env() {
local var_value="$1"
echo "$var_value" | awk -v RS=':' '!a[$0]++ {
if (NR > 1) printf(":");
printf("%s", $0)
}'
}
if [[ -f "$envfile" ]]; then
while IFS= read -r line || [[ -n "$line" ]]; do
# 跳过注释行和空行
[[ "$line" =~ ^[[:space:]]*# ]] && continue
[[ "$line" =~ ^[[:space:]]*$ ]] && continue
# 提取变量名和值
var_name="${line%%=*}"
var_value="${line#*=}"
# 检查变量名是否有效
[[ "$var_name" =~ ^[a-zA-Z_][a-zA-Z0-9_]*$ ]] || continue
if [[ " $env_white_list " == *" $var_name "* ]]; then
# 变量已设置且在白名单中,追加新值并去重
if [[ -n "${BASH_VERSION}" ]]; then
existing_value="${!var_name}"
else
existing_value="${(P)var_name}"
fi
if [[ -n "$existing_value" ]]; then
new_value="$var_value:$existing_value"
dedup_value=$(deduplicate_env "$new_value")
export "$var_name=$dedup_value"
else
export "$var_name=$var_value"
fi
fi
done <"$envfile"
fi
############################################################
# GPU 相关(原 prep_env.sh
############################################################
if [[ -n $NVIDIA_VISIBLE_DEVICES && $NVIDIA_VISIBLE_DEVICES != 'none' ]]; then
if [[ -n "${ZSH_VERSION}" ]]; then
cur_dir="$(cd "$(dirname "$0")" && pwd)"
else
cur_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
fi
source "$cur_dir"/nvidia.sh
fi