Sync event videos to Google Drive

If you've made a patch to quick fix a bug or to add a new feature not yet in the main tree then post it here so others can try it out.
Post Reply
montagdude
Posts: 91
Joined: Fri Nov 10, 2017 6:05 pm

Sync event videos to Google Drive

Post by montagdude »

I like that ZoneMinder saves videos in local storage, but lately I decided to also create a contingency plan for the potential situation where the ZoneMinder server is stolen during a break-in. This is a Python script using PyDrive that syncs your local videos with cloud storage on Google Drive. You need to have a Google Drive account and set up your own Google API project as described in the PyDrive documentation. For one thing, in order to avoid interactive webserver authentication every time, you need to create a settings.yaml file as described here.

It assumes that you have a folder in Google Drive where you want the videos to go. You need to edit the Drive folder ID by opening it in a browser and copying it from the end of the URL, then paste it in the code. There are some other assumptions in the script relating to your local storage directories and the video file format, so please read the comments carefully and edit as appropriate if you wish to use it.

Code: Select all

#!/usr/bin/env python3
#
# Syncs ZoneMinder event videos to Google Drive

import os
import sys
import time
from googleapiclient.errors import HttpError
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive

def get_drive_files(drive, folder_id):
    '''Returns a dict of files in a Google Drive folder by filename:id'''
    filedict = {}
    try:
        files = drive.ListFile({'q': "'{:s}' in parents and trashed=false" \
                .format(folder_id)}).GetList()
    except HttpError as err:
        sys.stderr.write(err + "\n")
        return filedict

    # Return just the title and id in a dict
    for file in files:
        fname = file['title']
        fid = file['id']
        filedict[fname] = fid
    return filedict

def upload_file(drive, folder_id, file_path):
    '''Uploads a file to the Google Drive folder'''
    filename = os.path.basename(file_path)
    try:
        f = drive.CreateFile({'title': filename, 'parents': [{'id': folder_id}]})
        f.SetContentFile(file_path)
        f.Upload()
        f = None
    except HttpError as err:
        sys.stderr.write(err + "\n")

def delete_file(drive, file_id):
    '''Deletes a file by ID from Google Drive'''
    f = drive.CreateFile({'id': file_id})
    try:
        f.Delete()
    except HttpError as err:
        sys.stderr.write(err + "\n")


if __name__ == "__main__":
    # The ID is in the url when opening the folder in the browser
    drive_folder_id = "EDIT ME"

    # How often to check for new files (in seconds)
    cycle_time = 20

    # Top-level ZoneMinder events directory
    events_dir = os.path.join("/var", "cache", "zoneminder", "events")

    # Subdirectories to search (recursively) for new events
    events_subdirs = ["1", "2", "3", "4", "5", "6", "7", "8"]

    # Authentication to Google Drive API
    gauth = GoogleAuth()

    # Rather than using local webserver authentication, which requires user interaction
    # every time, we're using the settings.yaml method to automatically authenticate.
    # See: https://pythonhosted.org/PyDrive/oauth.html#authentication-in-two-lines
    #gauth.LocalWebserverAuth()

    # Google Drive object
    drive = GoogleDrive(gauth)

    # Dict of files in the Drive folder
    drive_files = get_drive_files(drive, drive_folder_id)

    # Check for new files and upload them
    time_last = time.time()
    while True:
        # Update current time and time elapsed since last cycle
        time_now = time.time()
        deltatime = time_now - time_last

        # Sleep for the remaining portion of the cycle time
        if deltatime < cycle_time:
            time.sleep(cycle_time-deltatime)

        # Update last time checked now. If the subsequent uploads take longer than the cycle time,
        # it will check for new files immediately on the next loop iteration.
        time_last = time.time()

        # Search for new video files and upload
        any_uploaded = False
        local_files = []
        for subdir in events_subdirs:
            for root, dirs, files in os.walk(os.path.join(events_dir, subdir)):
                for file in files:
                    if file.endswith(".mp4"):
                        local_files.append(file)
                        # If the modification time is less than 5 seconds ago, don't upload it
                        # yet, because the event may still be going on.
                        file_path = os.path.join(root, file)
                        mtime = os.path.getmtime(file_path)
                        if time.time() - mtime < 5.0:
                            ename = file.split("-")[0]
                            print("Event {:s} may still be in progress. Skipping.".format(ename))
                            sys.stdout.flush()
                            continue
                        if not file in list(drive_files.keys()):
                            # Upload new video file
                            sys.stdout.write("Uploading {:s}...".format(file))
                            sys.stdout.flush()
                            upload_file(drive, drive_folder_id, file_path)
                            sys.stdout.write("Done.\n")
                            sys.stdout.flush()
                            any_uploaded = True

        # Remove any files from Google Drive that are no longer present in local storage
        any_deleted = False
        for fname, fid in drive_files.items():
            if not fname in local_files:
                sys.stdout.write("Deleting {:s}...".format(fname))
                sys.stdout.flush()
                delete_file(drive, fid)
                sys.stdout.write("Done.\n")
                sys.stdout.flush()
                any_deleted = True

        # Refresh the list of files on Google Drive
        if any_uploaded or any_deleted:
            drive_files = get_drive_files(drive, drive_folder_id)
montagdude
Posts: 91
Joined: Fri Nov 10, 2017 6:05 pm

Re: Sync event videos to Google Drive

Post by montagdude »

I made a couple improvements to this script:
  • Generic try/except instead of trying to catch specific errors. There are too many different ways the Google API calls can fail while trying to upload or delete a file, and it was too tedious and not really worthwhile to attempt to catch them all individually.
  • Switch to pydrive2, since pydrive has apparently ceased development.

Code: Select all

#!/usr/bin/env python3
#
# Syncs ZoneMinder event videos to Google Drive

import os
import sys
import time
from googleapiclient.errors import HttpError
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive

def get_drive_files(drive, folder_id):
    '''Returns a dict of files in a Google Drive folder by filename:id'''
    filedict = {}
    try:
        files = drive.ListFile({'q': "'{:s}' in parents and trashed=false" \
                .format(folder_id)}).GetList()
    except:
        sys.stderr.write("An error occurred while getting list of files.\n")
        return filedict

    # Return just the title and id in a dict
    for file in files:
        fname = file['title']
        fid = file['id']
        filedict[fname] = fid
    return filedict

def upload_file(drive, folder_id, file_path):
    '''Uploads a file to the Google Drive folder'''
    filename = os.path.basename(file_path)
    try:
        f = drive.CreateFile({'title': filename, 'parents': [{'id': folder_id}]})
        f.SetContentFile(file_path)
        f.Upload()
        f = None
    except:
        sys.stderr.write("An error occurred while uploading the file.\n")

def delete_file(drive, file_id):
    '''Deletes a file by ID from Google Drive'''
    f = drive.CreateFile({'id': file_id})
    try:
        f.Delete()
    except:
        sys.stderr.write("An error occurred while deleting the file.\n")


if __name__ == "__main__":
    # The ID is in the url when opening the folder in the browser
    drive_folder_id = "EDIT ME"

    # How often to check for new files (in seconds)
    cycle_time = 20

    # Top-level ZoneMinder events directory
    events_dir = os.path.join("/var", "cache", "zoneminder", "events")

    # Subdirectories to search (recursively) for new events
    events_subdirs = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]

    # Authentication to Google Drive API
    gauth = GoogleAuth()

    # Rather than using local webserver authentication, which requires user interaction
    # every time, we're using the settings.yaml method to automatically authenticate.
    # See: https://pythonhosted.org/PyDrive/oauth.html#authentication-in-two-lines
    #gauth.LocalWebserverAuth()

    # Google Drive object
    drive = GoogleDrive(gauth)

    # Dict of files in the Drive folder
    drive_files = get_drive_files(drive, drive_folder_id)

    # Check for new files and upload them
    time_last = time.time()
    while True:
        # Update current time and time elapsed since last cycle
        time_now = time.time()
        deltatime = time_now - time_last

        # Sleep for the remaining portion of the cycle time
        if deltatime < cycle_time:
            time.sleep(cycle_time-deltatime)

        # Update last time checked now. If the subsequent uploads take longer than the cycle time,
        # it will check for new files immediately on the next loop iteration.
        time_last = time.time()

        # Search for new video files and upload
        any_uploaded = False
        local_files = []
        for subdir in events_subdirs:
            for root, dirs, files in os.walk(os.path.join(events_dir, subdir)):
                for file in files:
                    if file.endswith(".mp4"):
                        local_files.append(file)
                        # If the modification time is less than 5 seconds ago, don't upload it
                        # yet, because the event may still be going on.
                        file_path = os.path.join(root, file)
                        mtime = os.path.getmtime(file_path)
                        if time.time() - mtime < 5.0:
                            ename = file.split("-")[0]
                            print("Event {:s} may still be in progress. Skipping.".format(ename))
                            sys.stdout.flush()
                            continue
                        if not file in list(drive_files.keys()):
                            # Upload new video file
                            sys.stdout.write("Uploading {:s}...".format(file))
                            sys.stdout.flush()
                            upload_file(drive, drive_folder_id, file_path)
                            sys.stdout.write("Done.\n")
                            sys.stdout.flush()
                            any_uploaded = True

        # Remove any files from Google Drive that are no longer present in local storage
        any_deleted = False
        for fname, fid in drive_files.items():
            if not fname in local_files:
                sys.stdout.write("Deleting {:s}...".format(fname))
                sys.stdout.flush()
                delete_file(drive, fid)
                sys.stdout.write("Done.\n")
                sys.stdout.flush()
                any_deleted = True

        # Refresh the list of files on Google Drive
        if any_uploaded or any_deleted:
            drive_files = get_drive_files(drive, drive_folder_id)
I also found out that if you have your Google Cloud Project that you need to set up to use the API set to "Testing," the refresh token will expire every 7 days, and you'll be forced to do the interactive web authentication again. You need to publish your project to remove that restriction.
montagdude
Posts: 91
Joined: Fri Nov 10, 2017 6:05 pm

Re: Sync event videos to Google Drive

Post by montagdude »

I added the capability to purge old files when exceeding a user-defined storage space limit, in MB. It works basically like ZoneMinder's PurgeWhenFull filter. This is useful if your Google Drive storage allocation is less than your ZoneMinder disk space, which it probably is for most people, as you can just set it to start purging at a little bit less than your total allocation (depending on how much other stuff you store on Google Drive).

See the max_storage_MB and temp_excess_MB variables and comments. Note there are a bunch of functions near the top of the script. The settings you may need or want to change if you want to use this are between lines 122 and 142. Specifically, they are:
  • drive_folder_id
  • cycle_time
  • max_storage_MB
  • temp_excess_MB
  • events_dir
  • events_subdirs
It probably would be better to put these in a config file and throw the whole project up on Github, but I'm lazy. If I do, I'll let you know.

Code: Select all

#!/usr/bin/env python3
#
# Syncs ZoneMinder event videos to Google Drive

import os
import sys
import time
from datetime import datetime, timezone
from collections import OrderedDict
from copy import copy
from googleapiclient.errors import HttpError
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive

def get_drive_files(drive, folder_id, max_retries=5):
    '''Returns a dict of files in a Google Drive folder by
       filename: {'id': fileID, 'date': modified_date, 'size': fileSizeMB}.'''
    filedict = {}
    success = False
    attempt = 1
    while not success:
        try:
            files = drive.ListFile({'q': "'{:s}' in parents and trashed=false" \
                    .format(folder_id)}).GetList()
            success = True
        except:
            print("An error occurred while getting list of files.")
            print("Retrying {:d} more times.".format(max_retries-attempt))
            success = False
            time.sleep(2)
        if attempt == max_retries and not success:
            sys.stderr.write("Failed to get list of files.\n")
            sys.exit(1)
        attempt += 1

    # Make a dict containing the file info we want
    totalMB = 0.
    for file in files:
        fname = file['title']
        fid = file['id']
        fsizeMB = float(file['fileSize'])/2**20
        totalMB += fsizeMB
        # Convert timestamp to datetime. See the following about replacing "Z" with +00:00 for
        # compatibility with Python < 3.11:
        # https://stackoverflow.com/questions/75867446/documentation-example-for-datetime-fromisoformat-raises-invalid-isoformat-string
        fdate = datetime.fromisoformat(file['modifiedDate'].replace("Z","+00:00"))
        filedict[fname] = {'id': fid, 'date': fdate, 'size': fsizeMB}
        
    return filedict, totalMB

def sort_drive_files(drive_files, events_dir, events_subdirs):
    '''Sorts google drive files by modification date. If the file is on disk, its modified
       date from Google drive will be overwritten with the metadata from the file on disk.'''

    # Get modification date on hard drive
    for subdir in events_subdirs:
        for root, dirs, files in os.walk(os.path.join(events_dir, subdir)):
            for file in files:
                if file in list(drive_files.keys()):
                    file_path = os.path.join(root, file)
                    mtime = os.path.getmtime(file_path)
                    file_date = datetime.fromtimestamp(mtime, tz=timezone.utc)
                    drive_files[file]['date'] = file_date

    # Sort by modified date in descending order
    # https://stackoverflow.com/questions/8031418/how-to-sort-ordereddict-of-ordereddict
    # Here item[1] returns to item's values, as opposed to the keys, which would be item[0]
    sorted_filedict = OrderedDict(sorted(drive_files.items(), key=lambda item: item[1]['date'],
                                         reverse=True))
    return sorted_filedict

def upload_file(drive, folder_id, file_path):
    '''Uploads a file to the Google Drive folder and returns status.'''
    filename = os.path.basename(file_path)
    status = True
    try:
        f = drive.CreateFile({'title': filename, 'parents': [{'id': folder_id}]})
        f.SetContentFile(file_path)
        f.Upload()
        f = None
    except:
        sys.stderr.write("An error occurred while uploading the file.\n")
        status = False

    return status

def delete_file(drive, file_id):
    '''Deletes a file by ID from Google Drive and returns status'''
    f = drive.CreateFile({'id': file_id})
    status = True
    try:
        f.Delete()
    except:
        sys.stderr.write("An error occurred while deleting the file.\n")
        status = False

    return status

def purge_old_files(drive, drive_files, totalMB, max_storage_MB):
    '''Deletes old files so that total size doesn't exceed a specified maximum.
       Returns updated ordered dict of files and total storage size in MB.'''
    new_drive_files = copy(drive_files)
    new_totalMB = totalMB

    # Iterate through the dict in reverse order (it is sorted by modified date in descending order),
    # deleting the oldest files until we are under the limit.
    for fname in reversed(drive_files):
        fileID = new_drive_files[fname]['id']
        fileMB = new_drive_files[fname]['size']
        check = delete_file(drive, fileID)
        if check:
            new_drive_files.pop(fname)
            sys.stdout.write("Purged {:s}\n".format(fname))
            sys.stdout.flush()
            new_totalMB -= fileMB
        if new_totalMB < max_storage_MB:
            break

    return new_drive_files, new_totalMB

if __name__ == "__main__":
    # The ID is in the url when opening the folder in the browser
    drive_folder_id = "EDIT ME"

    # How often to check for new files (in seconds)
    cycle_time = 20

    # Max storage allowed for all ZoneMinder videos uploaded to Google Drive. When uploading new
    # videos, the max may be temporarily exceeded by temp_excess_MB. After uploading new videos, if
    # the total storage is above the max, this script will start deleting the oldest videos to bring
    # the total back within the limit. Be sure to set the sum of these plus whatever other files you
    # may have on Google Drive low enough so that you don't exceed your account storage limit, or
    # else you will encounter upload errors.
    max_storage_MB = 30000.
    temp_excess_MB = 200.

    # Top-level ZoneMinder events directory
    events_dir = os.path.join("/var", "cache", "zoneminder", "events")

    # Subdirectories to search (recursively) for new events
    events_subdirs = ["10", "12", "14", "16", "18"]

    # Authentication to Google Drive API
    gauth = GoogleAuth()

    # Rather than using local webserver authentication, which requires user interaction
    # every time, we're using the settings.yaml method to automatically authenticate.
    # See: https://pythonhosted.org/PyDrive/oauth.html#authentication-in-two-lines
    #gauth.LocalWebserverAuth()

    # Google Drive object
    drive = GoogleDrive(gauth)

    # Dict of files in the Drive folder
    drive_files, totalMB = get_drive_files(drive, drive_folder_id)
    drive_files = sort_drive_files(drive_files, events_dir, events_subdirs)

    # Check for new files and upload them, while purging old and deleted files
    time_last = time.time()
    while True:
        # Update current time and time elapsed since last cycle
        time_now = time.time()
        deltatime = time_now - time_last

        # Sleep for the remaining portion of the cycle time
        if deltatime < cycle_time:
            time.sleep(cycle_time-deltatime)

        # Update last time checked now. If the subsequent uploads take longer than the cycle time,
        # it will check for new files immediately on the next loop iteration.
        time_last = time.time()

        # Find video files that aren't on Google Drive
        any_uploaded = False
        local_files = []
        files_to_upload = []
        for subdir in events_subdirs:
            for root, dirs, files in os.walk(os.path.join(events_dir, subdir)):
                for file in files:
                    if file.endswith(".mp4"):
                        local_files.append(file)
                        # If the modification time is less than 5 seconds ago, don't upload it
                        # yet, because the event may still be going on.
                        file_path = os.path.join(root, file)
                        mtime = os.path.getmtime(file_path)
                        if time.time() - mtime < 5.0:
                            ename = file.split("-")[0]
                            print("Event {:s} may still be in progress. Skipping.".format(ename))
                            sys.stdout.flush()
                            continue
                        if not file in list(drive_files.keys()):
                            file_date = datetime.fromtimestamp(mtime, tz=timezone.utc)
                            file_size_MB = os.path.getsize(file_path)/2**20
                            files_to_upload.append((file_date, file_path, file_size_MB, file))

        # Sort list of files by upload date in descending order
        files_to_upload = sorted(files_to_upload, reverse=True)

        # Reduce upload list so that we don't exceed storage space
        if len(drive_files) == 0:
            oldest_date = datetime.fromtimestamp(time.time(), tz=timezone.utc)
        else:
            oldest_date = drive_files[next(reversed(drive_files))]['date']
        new_files_to_upload = []
        MB_to_upload = 0.
        for file in files_to_upload:
            file_date = file[0]
            MB_to_upload += file[2]
            # No uploads can exceed max storage + temp excess
            if MB_to_upload + totalMB > max_storage_MB + temp_excess_MB:
                break
            # Old files shouldn't dig into temp excess, or we'll get a continuous cycle of
            # uploading old files and then purging them.
            if file_date < oldest_date and MB_to_upload + totalMB > max_storage_MB:
                break
            new_files_to_upload.append(file)
        # Reverse the list again so that oldest are uploaded first. This will make the modification
        # time on Google Drive closer to what it should be.
        files_to_upload = reversed(new_files_to_upload)

        # Finally, upload files
        for file in files_to_upload:
            file_date = file[0]
            file_path = file[1]
            file_size_MB = file[2]
            file_name = file[3]
            sys.stdout.write("Uploading {:s}...".format(file_name))
            sys.stdout.flush()
            check = upload_file(drive, drive_folder_id, file_path)
            if check:
                sys.stdout.write("Done.\n")
                sys.stdout.flush()
                totalMB += file_size_MB
                any_uploaded = True

        # Remove any files from Google Drive that are no longer present in local storage
        any_deleted = False
        for fname, fdict in drive_files.items():
            if not fname in local_files:
                sys.stdout.write("Deleting {:s}...".format(fname))
                sys.stdout.flush()
                check = delete_file(drive, fdict['id'])
                if check:
                    sys.stdout.write("Done.\n")
                    sys.stdout.flush()
                    totalMB -= fdict['size']
                    any_deleted = True

        # Refresh the list of files on Google Drive
        if any_uploaded or any_deleted:
            drive_files, totalMB = get_drive_files(drive, drive_folder_id)
            drive_files = sort_drive_files(drive_files, events_dir, events_subdirs)

        # Purge old files if up against the limit
        if totalMB >= max_storage_MB:
            drive_files, totalMB = purge_old_files(drive, drive_files, totalMB, max_storage_MB)
Post Reply