# Pastebin dQVrXPSq spark_use_local_conda = False # the bundled local conda environment. # this is required if the spark workers require # modules that are not installed on the default wmf conda base environment if spark_use_local_conda: spark_env_file = 'spark_venv.tar.gz' if os.path.isfile(spark_env_file): print(f"the conda environment is packed already in file {spark_env_file}. if you installed new dependencies, delete {spark_env_file} first or use a different file name") else: !conda pack --force --ignore-editable-packages -o {spark_env_file} # set the python on the worker machines if spark_use_local_conda: # upload the bundled local conda environment os.environ['PYSPARK_SUBMIT_ARGS'] = f'--archives {spark_env_file}#venv pyspark-shell' os.environ['PYSPARK_PYTHON'] = 'venv/bin/python' else: # use the base wmf conda environment os.environ['PYSPARK_PYTHON'] = '/usr/lib/anaconda-wmf/bin/python'