#!/bin/bash ############################################################ # alias ############################################################ alias launch='mlx worker launch' alias run='launch' ################## # history ################## export HISTSIZE=100000 export HISTFILESIZE=200000 # 2. 在关闭终端时追加历史记录,而不是覆盖 [[ -n "${BASH_VERSION}" ]] && shopt -s histappend # 3. 忽略重复及以空格开头的命令 export HISTCONTROL=ignoreboth # 4. 忽略特定的常用命令 # export HISTIGNORE="&:[bf]g:exit:pwd:clear:history" # 5. 为命令添加时间戳 export HISTTIMEFORMAT="%F %T " # 6. 在每次命令后立即写入历史,实现多终端实时共享 # 注意:这可能会对性能有轻微影响,但对于大多数现代计算机来说可以忽略不计 # export PROMPT_COMMAND="history -a; history -c; history -r; $PROMPT_COMMAND" ################## # set ulimit ################## ulimit -n 1024768 ############################################################ # 兼容 IGNOREEOF ############################################################ # bash: 连续两次 Ctrl-D 才退出 export IGNOREEOF=2 # zsh: 对应的 setopt if [[ -n "${ZSH_VERSION}" ]]; then setopt IGNORE_EOF 2>/dev/null || true fi ############################################################ # MLX / Merlin 相关环境(从 mlxrc & start_devbox 合并) ############################################################ # Kerberos cache 位置 export KRB5CCNAME="${KRB5CCNAME:-FILE:/opt/tiger/.krb5cc_${UID}}" # MLX / Notebook 相关 PYTHONPATH # 在已有 PYTHONPATH 前追加 /mlx/workspace:/tmp/mlx/workspace:/opt/tiger:/opt/tiger/lite_sdk # 并确保 mlx_notebook_pysdk 路径存在 if [[ -z "${PYTHONPATH}" ]]; then export PYTHONPATH="/mlx/workspace:/tmp/mlx/workspace:/opt/tiger:/opt/tiger/lite_sdk:/opt/tiger/mlx_notebook_pysdk/mlx-pysdk" else export PYTHONPATH="/mlx/workspace:/tmp/mlx/workspace:/opt/tiger:/opt/tiger/lite_sdk:${PYTHONPATH}:/opt/tiger/mlx_notebook_pysdk/mlx-pysdk" fi # pyspark 相关 export PYSPARK_PYTHON="${PYSPARK_PYTHON:-/usr/bin/python3}" export PYSPARK_DRIVER_PYTHON="${PYSPARK_DRIVER_PYTHON:-/usr/bin/python3}" # 默认 local[2],和原 mlxrc 保持一致 export PYSPARK_SUBMIT_ARGS="${PYSPARK_SUBMIT_ARGS:---master local[2] pyspark-shell}" # MLXLAB devbox code disk 标记(如果启动脚本已经设置,就不要覆盖) if [[ -n "${MLXLAB_DEVBOX_CODE_DISK}" ]]; then export MLXLAB_DEVBOX_CODE_DISK='' fi # MLX / Merlin 相关变量(假设在容器启动时已经由 start_devbox.sh 导出) # 这里统一再 export 一遍,保证交互 shell 也能继承 export MLX_USER_TOKEN='bd2a3f1b71d54ab497f13f2eefc4f5bc' export MLX_USER='qingyuhao' export MLX_IMAGE_VID='d51qjdbc77u9kebl603g' export MLX_IMAGE_URL='hub.byted.org/reckon/data.reckon.mlx.image_10786:4aa09c59c9cbb1fcde50526696177c84' export MLX_ENGINE_SID='bgi0zldy6943b7db' export MLX_DEVBOX_ID='132783' export MLX_HOST_URL='https://ml.bytedance.net/' export MERLIN_HOST_URL='https://ml.bytedance.net' export TCC_CDN_HOST='//lf6-config.bytetcc.com/obj/tcc-config-web' export ZTI_ENV='cn' export IS_SEED='true' export MLXLAB_DEVBOX_CODE_DISK='' # DOAS_ZONE(从 BYTE_REGION 推导) if [[ "${BYTE_REGION}" == "CN" ]]; then export DOAS_ZONE="cn" else export DOAS_ZONE="row" fi # SEC_KV_AUTH export SEC_KV_AUTH=1 # 统一补全 NO_PROXY / no_proxy # 基础域名列表 _base_no_proxy=".byted.org,byted.org,.bytedance.net,bytedance.net" for _v in NO_PROXY no_proxy; do if [[ -n "${BASH_VERSION}" ]]; then _cur="${!_v}" else _cur="${(P)_v}" fi if [[ -z "${_cur}" ]]; then export ${_v}="${_base_no_proxy}" else # 如果已经包含基础域名,就不重复追加 if [[ "${_cur}" == *".byted.org"* || "${_cur}" == *".bytedance.net"* ]]; then export ${_v}="${_cur}" else export ${_v}="${_base_no_proxy},${_cur}" fi fi done unset _v _cur _base_no_proxy # IS_SEED: 禁用代理(和原来写进 mlxrc 的逻辑保持一致) if [[ "${IS_SEED}" == "true" ]]; then unset http_proxy HTTP_PROXY https_proxy HTTPS_PROXY fi ############################################################ # /tmp/krb5cc_0 软链(从 mlxrc 挪过来) ############################################################ if [[ ! -e /tmp/krb5cc_0 && ! -L /tmp/krb5cc_0 ]]; then ln -s /opt/tiger/.krb5cc_0 /tmp/krb5cc_0 2>/dev/null || true fi ############################################################ # for hdfs(原 prep_env.sh 中的逻辑) ############################################################ if ! grep -q "source import_hdfs_envs.sh" ~/.bashrc 2>/dev/null then if command -v import_hdfs_envs.sh > /dev/null 2>&1 && [[ $(which import_hdfs_envs.sh) != *"pyenv/"* ]]; then source import_hdfs_envs.sh else echo "pyenv env, skip hdfs init, please refer to: https://bytedance.larkoffice.com/docx/T9IrdzKRSomwOOxqbUOcRZyLnqh to do hdfs init manually" fi fi ############################################################ # 代理相关 ############################################################ unset HTTPS_PROXY ############################################################ # 一次性加载 workspace 级别 env ############################################################ if [[ "$WORKSPACE_ENVS_SET" != "1" ]]; then envfile="/etc/.container_env" if [[ -f $envfile ]]; then while IFS= read -r line; do _varname="${line%%=*}" if eval "[[ -z \"\${${_varname}+x}\" ]]"; then # env is not set export "${_varname}=${line#*=}" fi done <"$envfile" unset _varname fi if [[ -n "${ZSH_VERSION}" ]]; then cur_dir="$(cd "$(dirname "$0")" && pwd)" if [[ "$ARNOLD_WORKSPACE_BUILD_ENV" != "dev" ]]; then source "$cur_dir"/zsh_hadoop.sh fi else cur_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" if [[ "$ARNOLD_WORKSPACE_BUILD_ENV" != "dev" ]]; then source "$cur_dir"/bash_hadoop.sh fi fi source "$cur_dir"/common_env.sh # 原来这里设置 PATH=/workspace:$PATH,下面会统一 double-check PATH,再保证 /workspace 在 PATH 里 export PATH=/workspace:$PATH # 从 code-server 进来的 WORKSPACE_PASSWORD if [[ -z "${WORKSPACE_PASSWORD}" ]]; then export WORKSPACE_PASSWORD="$(ps -ef | grep -oP 'vscode-token\ \K.*' | head -n 1)" fi export WORKSPACE_ENVS_SET=1 fi ############################################################ # Double check PATH # 保证以下目录全部在 PATH 中(合并了原 prep_env.sh + mlxrc 的目录) ############################################################ for i in \ "$HOME/.local/bin" \ "$HOME/bin" \ "/workspace" \ "/usr/local/bvc/bin" \ "/opt/tiger/arnold/bin" \ "/opt/tiger/consul_deploy/bin/go" \ "/opt/tiger/spark_deploy/spark-3.2/spark-stable/bin" \ "/opt/mlx_deploy/miniconda3/envs/mlx/bin" \ "/opt/tiger/mlx_deploy" \ "/opt/tiger/ss_bin" \ "/opt/tiger/ss_lib/bin" \ "/opt/common_tools" \ "/opt/tiger/yarn_deploy/hadoop/bin" \ "/opt/tiger/yarn_deploy/hive/bin" do if [[ ":$PATH:" != *":$i:"* ]]; then PATH="$i:$PATH" fi done # DEVBOX_PATH 单独处理一下(可能是一个长路径) if [[ -n "${DEVBOX_PATH}" && ":$PATH:" != *":$DEVBOX_PATH:"* ]]; then PATH="${DEVBOX_PATH}:$PATH" fi export PATH ############################################################ # worker 级别 env(原 prep_env.sh) ############################################################ worker_envfile="/etc/worker_envs_$ARNOLD_WORKER_ID" user_worker_envfile="$HOME/.worker_envs/worker_envs_$ARNOLD_WORKER_ID" if [[ -f $user_worker_envfile ]]; then while IFS= read -r line; do _varname="${line%%=*}" if eval "[[ -z \"\${${_varname}+x}\" ]]"; then # env is not set export "${_varname}=${line#*=}" fi done <"$user_worker_envfile" unset _varname elif [[ -f $worker_envfile ]]; then while IFS= read -r line; do _varname="${line%%=*}" if eval "[[ -z \"\${${_varname}+x}\" ]]"; then # env is not set export "${_varname}=${line#*=}" fi done <"$worker_envfile" unset _varname fi ############################################################ # PS1(原 prep_env.sh) ############################################################ if [[ -z "${ZSH_VERSION}" ]]; then if [[ -n "${ARNOLD_WORKER_ID}" ]]; then export PS1='\[\033[01;32m\]\u@$ARNOLD_WORKER_ID.worker:\W\[\033[00m\]\$ \[\]' elif [[ -n "${ARNOLD_WORKSPACE_ID}" ]]; then export PS1='\[\033[01;34m\]\u@$ARNOLD_WORKSPACE_ID.master:\W\[\033[00m\]\$ \[\]' else export PS1='\u@\h:\W\$ ' fi fi ############################################################ # 从 镜像中继承 PYTHONPATH、PATH、LD_LIBRARY_PATH 等关键环境变量 ############################################################ envfile="/etc/.env" env_white_list="PYTHONPATH PATH LD_LIBRARY_PATH" # 需要去重的环境变量列表 # 定义一个函数,用于对冒号分隔的环境变量去重 deduplicate_env() { local var_value="$1" echo "$var_value" | awk -v RS=':' '!a[$0]++ { if (NR > 1) printf(":"); printf("%s", $0) }' } if [[ -f "$envfile" ]]; then while IFS= read -r line || [[ -n "$line" ]]; do # 跳过注释行和空行 [[ "$line" =~ ^[[:space:]]*# ]] && continue [[ "$line" =~ ^[[:space:]]*$ ]] && continue # 提取变量名和值 var_name="${line%%=*}" var_value="${line#*=}" # 检查变量名是否有效 [[ "$var_name" =~ ^[a-zA-Z_][a-zA-Z0-9_]*$ ]] || continue if [[ " $env_white_list " == *" $var_name "* ]]; then # 变量已设置且在白名单中,追加新值并去重 if [[ -n "${BASH_VERSION}" ]]; then existing_value="${!var_name}" else existing_value="${(P)var_name}" fi if [[ -n "$existing_value" ]]; then new_value="$var_value:$existing_value" dedup_value=$(deduplicate_env "$new_value") export "$var_name=$dedup_value" else export "$var_name=$var_value" fi fi done <"$envfile" fi ############################################################ # GPU 相关(原 prep_env.sh) ############################################################ if [[ -n $NVIDIA_VISIBLE_DEVICES && $NVIDIA_VISIBLE_DEVICES != 'none' ]]; then if [[ -n "${ZSH_VERSION}" ]]; then cur_dir="$(cd "$(dirname "$0")" && pwd)" else cur_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" fi source "$cur_dir"/nvidia.sh fi