Uncategorized

linux – python process become Uninterruptible Sleep


here is my python script, to process about 1m files, mainly to check if these files exists:

def check_data_valid():
    data_root = "/data_alpha/data/video/internvid/"
    save_root = "/data_alpha/data/video/internvid/all_file_list/proc"
    download_list = "/data_alpha/data/video/internvid/all_file_list/all_files.txt"
    valid_data = []
    err_data = []
    missing_data = []
    with open(download_list, "r") as f:
        a = f.readlines()
        downloads = [i.strip() for i in a]
        for i, item in enumerate(downloads):
            file = os.path.join(data_root, item)
            avi = file.replace(".mp4", ".avi")
            if os.path.exists(avi):
                valid_data.append(item)
            elif os.path.exists(file):
                err_data.append(item)
            else:
                missing_data.append(item)
            if (i) % 200000 == 1:
                if len(valid_data) > 0:
                    with open(os.path.join(save_root, f"valid_{len(valid_data)}_{i}.txt"), "w") as f:
                        f.writelines("\n".join(valid_data))
                if len(err_data) > 0:
                    with open(os.path.join(save_root, f"error_{len(err_data)}_{i+start}.txt"), "w") as f:
                        f.writelines("\n".join(err_data))
                if len(missing_data) > 0:
                    with open(os.path.join(save_root, f"missing_{len(missing_data)}_{i}.txt"), "w") as f:
                        f.writelines("\n".join(missing_data))
        if len(valid_data) > 0:
            with open(os.path.join(save_root, f"valid_total.txt"), "w") as f:
                f.writelines("\n".join(valid_data))
        if len(err_data) > 0:
            with open(os.path.join(save_root, f"error_total.txt"), "w") as f:
                f.writelines("\n".join(err_data))
        if len(missing_data) > 0:
            with open(os.path.join(save_root, f"missing_total.txt"), "w") as f:
                f.writelines("\n".join(missing_data))



if __name__ == '__main__':
    check_data_valid()

but after run serveral hours, and processed about 100k files by reviewing the files in save_root. Then this process will turn into status D (by use ps command to see), which means Uninterruptible Sleep, I wonder why this happens and how to avoid this.



Source link

Leave a Reply

Your email address will not be published. Required fields are marked *