Compare commits

...

2 Commits

Author SHA1 Message Date
Yuhao Qing
76549f5438 merlin zsh compatible 2025-12-18 17:42:46 +08:00
Yuhao Qing
a7dcd7c271 Stage prep_env.sh 2025-12-18 17:42:20 +08:00
2 changed files with 325 additions and 29 deletions

View File

@@ -1,40 +1,21 @@
# Merlin devbox specific aliases
alias l=ls
alias ll='ls -l --color=auto'
alias infosys='cd ~/repos/infosys; pushonline -minfosys'
export PATH=/opt/tiger/tce/tce_tools/bin:/home/tiger/.local/bin:/opt/common_tools:/usr/local/go/bin:$PATH
# Patch prep_env.sh for zsh compatibility before sourcing
source <(sed -e 's/^shopt -s histappend$/[[ -n "${BASH_VERSION}" ]] \&\& shopt -s histappend/' \
-e 's/\${!\([^}]*\)}/${(P)\1}/g' \
/workspace/mlx/../vscode/prep_env.sh)
alias cp='cp -i'
alias mv='mv -i'
alias m='more'
alias ll='ls -l'
alias lsl='ls -lrt'
alias lm='ls -al|more'
alias l='ls -lrt'
alias c='cat'
alias v='vi'
alias cl='clear'
alias pg='ps -ef| grep '
# Set terminal type to xterm for better compatibility with terminal features
# export TERM=xterm
# Customize command prompt to show: hostname(TCE_info):current_directory$
# \h = hostname, \W = current working directory basename, \$ = $ for user/# for root
# export PS1='\h($TCE_PSM@$TCE_CLUSTER:$TCE_ENV):\W\$ '
source /workspace/mlx/../vscode/prep_env.sh
# sh -c /opt/tiger/mlx_deploy/greeting.sh
source /opt/tiger/mlx_deploy/mlxrc
if [ -f /opt/tiger/mlx_deploy/pythonpath_rc ]; then
if [ -f "/opt/tiger/mlx_deploy/pythonpath_rc" ]; then
source /opt/tiger/mlx_deploy/pythonpath_rc
fi
if command -v import_hdfs_envs.sh > /dev/null 2>&1 && [ -z ]; then source import_hdfs_envs.sh; fi
if [ -f /opt/tiger/rh2_bashrc ]; then
if [ -f "/opt/tiger/rh2_bashrc" ]; then
source /opt/tiger/rh2_bashrc
fi
source /opt/tiger/mlx_deploy/userrc
. "$HOME/.cargo/env"
# huggingface proxy
export HF_NETWORK_PROXY=http://huggingface-proxy-sg.byted.org

315
zshrc/prep_env.sh Normal file
View File

@@ -0,0 +1,315 @@
#!/bin/bash
############################################################
# alias
############################################################
alias launch='mlx worker launch'
alias run='launch'
##################
# history
##################
export HISTSIZE=100000
export HISTFILESIZE=200000
# 2. 在关闭终端时追加历史记录,而不是覆盖
[[ -n "${BASH_VERSION}" ]] && shopt -s histappend
# 3. 忽略重复及以空格开头的命令
export HISTCONTROL=ignoreboth
# 4. 忽略特定的常用命令
# export HISTIGNORE="&:[bf]g:exit:pwd:clear:history"
# 5. 为命令添加时间戳
export HISTTIMEFORMAT="%F %T "
# 6. 在每次命令后立即写入历史,实现多终端实时共享
# 注意:这可能会对性能有轻微影响,但对于大多数现代计算机来说可以忽略不计
# export PROMPT_COMMAND="history -a; history -c; history -r; $PROMPT_COMMAND"
##################
# set ulimit
##################
ulimit -n 1024768
############################################################
# 兼容 IGNOREEOF
############################################################
# bash: 连续两次 Ctrl-D 才退出
export IGNOREEOF=2
# zsh: 对应的 setopt
if [[ -n "${ZSH_VERSION}" ]]; then
setopt IGNORE_EOF 2>/dev/null || true
fi
############################################################
# MLX / Merlin 相关环境(从 mlxrc & start_devbox 合并)
############################################################
# Kerberos cache 位置
export KRB5CCNAME="${KRB5CCNAME:-FILE:/opt/tiger/.krb5cc_${UID}}"
# MLX / Notebook 相关 PYTHONPATH
# 在已有 PYTHONPATH 前追加 /mlx/workspace:/tmp/mlx/workspace:/opt/tiger:/opt/tiger/lite_sdk
# 并确保 mlx_notebook_pysdk 路径存在
if [[ -z "${PYTHONPATH}" ]]; then
export PYTHONPATH="/mlx/workspace:/tmp/mlx/workspace:/opt/tiger:/opt/tiger/lite_sdk:/opt/tiger/mlx_notebook_pysdk/mlx-pysdk"
else
export PYTHONPATH="/mlx/workspace:/tmp/mlx/workspace:/opt/tiger:/opt/tiger/lite_sdk:${PYTHONPATH}:/opt/tiger/mlx_notebook_pysdk/mlx-pysdk"
fi
# pyspark 相关
export PYSPARK_PYTHON="${PYSPARK_PYTHON:-/usr/bin/python3}"
export PYSPARK_DRIVER_PYTHON="${PYSPARK_DRIVER_PYTHON:-/usr/bin/python3}"
# 默认 local[2],和原 mlxrc 保持一致
export PYSPARK_SUBMIT_ARGS="${PYSPARK_SUBMIT_ARGS:---master local[2] pyspark-shell}"
# MLXLAB devbox code disk 标记(如果启动脚本已经设置,就不要覆盖)
if [[ -n "${MLXLAB_DEVBOX_CODE_DISK}" ]]; then
export MLXLAB_DEVBOX_CODE_DISK=''
fi
# MLX / Merlin 相关变量(假设在容器启动时已经由 start_devbox.sh 导出)
# 这里统一再 export 一遍,保证交互 shell 也能继承
export MLX_USER_TOKEN='bd2a3f1b71d54ab497f13f2eefc4f5bc'
export MLX_USER='qingyuhao'
export MLX_IMAGE_VID='d51qjdbc77u9kebl603g'
export MLX_IMAGE_URL='hub.byted.org/reckon/data.reckon.mlx.image_10786:4aa09c59c9cbb1fcde50526696177c84'
export MLX_ENGINE_SID='bgi0zldy6943b7db'
export MLX_DEVBOX_ID='132783'
export MLX_HOST_URL='https://ml.bytedance.net/'
export MERLIN_HOST_URL='https://ml.bytedance.net'
export TCC_CDN_HOST='//lf6-config.bytetcc.com/obj/tcc-config-web'
export ZTI_ENV='cn'
export IS_SEED='true'
export MLXLAB_DEVBOX_CODE_DISK=''
# DOAS_ZONE从 BYTE_REGION 推导)
if [[ "${BYTE_REGION}" == "CN" ]]; then
export DOAS_ZONE="cn"
else
export DOAS_ZONE="row"
fi
# SEC_KV_AUTH
export SEC_KV_AUTH=1
# 统一补全 NO_PROXY / no_proxy
# 基础域名列表
_base_no_proxy=".byted.org,byted.org,.bytedance.net,bytedance.net"
for _v in NO_PROXY no_proxy; do
if [[ -n "${BASH_VERSION}" ]]; then
_cur="${!_v}"
else
_cur="${(P)_v}"
fi
if [[ -z "${_cur}" ]]; then
export ${_v}="${_base_no_proxy}"
else
# 如果已经包含基础域名,就不重复追加
if [[ "${_cur}" == *".byted.org"* || "${_cur}" == *".bytedance.net"* ]]; then
export ${_v}="${_cur}"
else
export ${_v}="${_base_no_proxy},${_cur}"
fi
fi
done
unset _v _cur _base_no_proxy
# IS_SEED: 禁用代理(和原来写进 mlxrc 的逻辑保持一致)
if [[ "${IS_SEED}" == "true" ]]; then
unset http_proxy HTTP_PROXY https_proxy HTTPS_PROXY
fi
############################################################
# /tmp/krb5cc_0 软链(从 mlxrc 挪过来)
############################################################
if [[ ! -e /tmp/krb5cc_0 && ! -L /tmp/krb5cc_0 ]]; then
ln -s /opt/tiger/.krb5cc_0 /tmp/krb5cc_0 2>/dev/null || true
fi
############################################################
# for hdfs原 prep_env.sh 中的逻辑)
############################################################
if ! grep -q "source import_hdfs_envs.sh" ~/.bashrc 2>/dev/null
then
if command -v import_hdfs_envs.sh > /dev/null 2>&1 && [[ $(which import_hdfs_envs.sh) != *"pyenv/"* ]]; then
source import_hdfs_envs.sh
else
echo "pyenv env, skip hdfs init, please refer to: https://bytedance.larkoffice.com/docx/T9IrdzKRSomwOOxqbUOcRZyLnqh to do hdfs init manually"
fi
fi
############################################################
# 代理相关
############################################################
unset HTTPS_PROXY
############################################################
# 一次性加载 workspace 级别 env
############################################################
if [[ "$WORKSPACE_ENVS_SET" != "1" ]]; then
envfile="/etc/.container_env"
if [[ -f $envfile ]]; then
while IFS= read -r line; do
_varname="${line%%=*}"
if eval "[[ -z \"\${${_varname}+x}\" ]]"; then # env is not set
export "${_varname}=${line#*=}"
fi
done <"$envfile"
unset _varname
fi
if [[ -n "${ZSH_VERSION}" ]]; then
cur_dir="$(cd "$(dirname "$0")" && pwd)"
if [[ "$ARNOLD_WORKSPACE_BUILD_ENV" != "dev" ]]; then
source "$cur_dir"/zsh_hadoop.sh
fi
else
cur_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
if [[ "$ARNOLD_WORKSPACE_BUILD_ENV" != "dev" ]]; then
source "$cur_dir"/bash_hadoop.sh
fi
fi
source "$cur_dir"/common_env.sh
# 原来这里设置 PATH=/workspace:$PATH下面会统一 double-check PATH再保证 /workspace 在 PATH 里
export PATH=/workspace:$PATH
# 从 code-server 进来的 WORKSPACE_PASSWORD
if [[ -z "${WORKSPACE_PASSWORD}" ]]; then
export WORKSPACE_PASSWORD="$(ps -ef | grep -oP 'vscode-token\ \K.*' | head -n 1)"
fi
export WORKSPACE_ENVS_SET=1
fi
############################################################
# Double check PATH
# 保证以下目录全部在 PATH 中(合并了原 prep_env.sh + mlxrc 的目录)
############################################################
for i in \
"$HOME/.local/bin" \
"$HOME/bin" \
"/workspace" \
"/usr/local/bvc/bin" \
"/opt/tiger/arnold/bin" \
"/opt/tiger/consul_deploy/bin/go" \
"/opt/tiger/spark_deploy/spark-3.2/spark-stable/bin" \
"/opt/mlx_deploy/miniconda3/envs/mlx/bin" \
"/opt/tiger/mlx_deploy" \
"/opt/tiger/ss_bin" \
"/opt/tiger/ss_lib/bin" \
"/opt/common_tools" \
"/opt/tiger/yarn_deploy/hadoop/bin" \
"/opt/tiger/yarn_deploy/hive/bin"
do
if [[ ":$PATH:" != *":$i:"* ]]; then
PATH="$i:$PATH"
fi
done
# DEVBOX_PATH 单独处理一下(可能是一个长路径)
if [[ -n "${DEVBOX_PATH}" && ":$PATH:" != *":$DEVBOX_PATH:"* ]]; then
PATH="${DEVBOX_PATH}:$PATH"
fi
export PATH
############################################################
# worker 级别 env原 prep_env.sh
############################################################
worker_envfile="/etc/worker_envs_$ARNOLD_WORKER_ID"
user_worker_envfile="$HOME/.worker_envs/worker_envs_$ARNOLD_WORKER_ID"
if [[ -f $user_worker_envfile ]]; then
while IFS= read -r line; do
_varname="${line%%=*}"
if eval "[[ -z \"\${${_varname}+x}\" ]]"; then # env is not set
export "${_varname}=${line#*=}"
fi
done <"$user_worker_envfile"
unset _varname
elif [[ -f $worker_envfile ]]; then
while IFS= read -r line; do
_varname="${line%%=*}"
if eval "[[ -z \"\${${_varname}+x}\" ]]"; then # env is not set
export "${_varname}=${line#*=}"
fi
done <"$worker_envfile"
unset _varname
fi
############################################################
# PS1原 prep_env.sh
############################################################
if [[ -z "${ZSH_VERSION}" ]]; then
if [[ -n "${ARNOLD_WORKER_ID}" ]]; then
export PS1='\[\033[01;32m\]\u@$ARNOLD_WORKER_ID.worker:\W\[\033[00m\]\$ \[\]'
elif [[ -n "${ARNOLD_WORKSPACE_ID}" ]]; then
export PS1='\[\033[01;34m\]\u@$ARNOLD_WORKSPACE_ID.master:\W\[\033[00m\]\$ \[\]'
else
export PS1='\u@\h:\W\$ '
fi
fi
############################################################
# 从 镜像中继承 PYTHONPATH、PATH、LD_LIBRARY_PATH 等关键环境变量
############################################################
envfile="/etc/.env"
env_white_list="PYTHONPATH PATH LD_LIBRARY_PATH" # 需要去重的环境变量列表
# 定义一个函数,用于对冒号分隔的环境变量去重
deduplicate_env() {
local var_value="$1"
echo "$var_value" | awk -v RS=':' '!a[$0]++ {
if (NR > 1) printf(":");
printf("%s", $0)
}'
}
if [[ -f "$envfile" ]]; then
while IFS= read -r line || [[ -n "$line" ]]; do
# 跳过注释行和空行
[[ "$line" =~ ^[[:space:]]*# ]] && continue
[[ "$line" =~ ^[[:space:]]*$ ]] && continue
# 提取变量名和值
var_name="${line%%=*}"
var_value="${line#*=}"
# 检查变量名是否有效
[[ "$var_name" =~ ^[a-zA-Z_][a-zA-Z0-9_]*$ ]] || continue
if [[ " $env_white_list " == *" $var_name "* ]]; then
# 变量已设置且在白名单中,追加新值并去重
if [[ -n "${BASH_VERSION}" ]]; then
existing_value="${!var_name}"
else
existing_value="${(P)var_name}"
fi
if [[ -n "$existing_value" ]]; then
new_value="$var_value:$existing_value"
dedup_value=$(deduplicate_env "$new_value")
export "$var_name=$dedup_value"
else
export "$var_name=$var_value"
fi
fi
done <"$envfile"
fi
############################################################
# GPU 相关(原 prep_env.sh
############################################################
if [[ -n $NVIDIA_VISIBLE_DEVICES && $NVIDIA_VISIBLE_DEVICES != 'none' ]]; then
if [[ -n "${ZSH_VERSION}" ]]; then
cur_dir="$(cd "$(dirname "$0")" && pwd)"
else
cur_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
fi
source "$cur_dir"/nvidia.sh
fi