Getting md5sums from Google Drive

From time to time I pull out grind, and upload the latest and greatest of my private pictures. It is nothing fancy, and it is basicly unmaintained. But it works.
Some days ago, I got an request on email to help with collecting md5sums from every file in the Google Drive account. Since grind does use md5sums to compare files, it was only a question about picking out the right pieces. So here it is - my simple get-your-md5sums-from-google-drive python script:
grind-md5.py download
#!/usr/bin/env python2

"""
Copyright (c) 2014 Martin Hundebøll
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""

from oauth2client.client import OAuth2WebServerFlow, FlowExchangeError, OAuth2Credentials
from apiclient.discovery import build
from apiclient import errors
import httplib2
import logging
import sys
import os

logger = logging.getLogger(__file__)
ch = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s: %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
logger.setLevel(logging.INFO)

CLIENT_ID = '472054675343-83t5nuooa4u0180tf3o80j74hd0sh3pp.apps.googleusercontent.com'
CLIENT_SECRET = 'A_Jqz_0bh9nlZW_F9M7ItYOw'
SCOPE = 'https://www.googleapis.com/auth/drive'
REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'
CRED_PATH = 'creds.json'

class grind_md5(object):
    backoff_time = 1
    file_items = []
    file_index = {}
    file_paths = {}
    folder_index = {}
    folder_paths = {}
    total_size = 0

    def authenticate_new(self):
        self.flow = OAuth2WebServerFlow(CLIENT_ID, CLIENT_SECRET, SCOPE, REDIRECT_URI)

        auth_uri = self.flow.step1_get_authorize_url()
        print('Go to the following link in your browser: ' + auth_uri)
        code = raw_input('Enter verification code: ').strip()

        try:
            self.credentials = self.flow.step2_exchange(code)
        except FlowExchangeError as e:
            logger.error("unable to authenticate: " + e.message)
            sys.exit(1)

        json = self.credentials.to_json()
        f = open(CRED_PATH, 'wb')
        f.write(json)
        f.close()

    def authenticate_saved(self):
        try:
            f = open(CRED_PATH, 'rb')
            json = f.read()
            self.credentials = OAuth2Credentials.from_json(json)
        except (IOError, ValueError) as e:
            logger.error('unable to load credentials: {}'.format(e))
            self.authenticate_new()

    def authorize(self):
        h = httplib2.Http()
        self.http = self.credentials.authorize(h)

    def create_drive(self):
        drive = build('drive', 'v2', http=self.http)

        if drive is None:
            logger.error("Failed to create drive object")
            sys.exit(1)

        self.drive = drive

    def get_file_list(self):
        fields = [
            'createdDate',
            'downloadUrl',
            'fileExtension',
            'fileSize',
            'id',
            'kind',
            'md5Checksum',
            'mimeType',
            'modifiedByMeDate',
            'modifiedDate',
            'originalFilename',
            'title',
            'parents(id,isRoot)'
        ]

        page_token = None

        while True:
            try:
                param = {
                    'q': 'trashed=false',
                    'maxResults': 1000,
                    'fields': 'items(' + ','.join(fields) + '),nextPageToken'
                }
                logger.info("resolving drive files ({} files received)".format(len(self.file_items)))

                if page_token:
                    param['pageToken'] = page_token

                files = self.drive.files().list(**param).execute()

                self.file_items += files['items']
                page_token = files.get('nextPageToken')
                self.backoff_time = 1

                if not page_token:
                    break
            except errors.HttpError as e:
                logger.error("Failed to receive file list from drive: {}".format(e))
                time.sleep(self.backoff_time)
                self.backoff_time *= 2

        logger.info('resolved {} files/folders'.format(len(self.file_items)))

    def recurse_tree(self, item, path = None):
        title = item['title'].encode('utf-8')
        if path:
            path = os.path.join(title, path)
        else:
            path = title

        if not item['parents']:
            return path

        if item['parents'][0]['isRoot']:
            return path

        parent_id = item['parents'][0]['id']

        if parent_id not in self.file_index:
            logger.error('incomplete tree: {}'.format(path))
            return path

        parent = self.file_index[parent_id]

        return self.recurse_tree(parent, path)

    def build_tree(self):
        logger.info('building drive tree')

        for item in self.file_items:
            i = item['id']
            self.file_index[i] = item

        for item in self.file_items:
            i = item['id']
            size = item.get('fileSize', 0)
            path = self.recurse_tree(item)
            item['path'] = path

            if item['mimeType'] == 'application/vnd.google-apps.folder':
                if path in self.folder_paths:
                    logger.warning('duplicate folder name: ' + path)

                self.folder_index[i] = item
                self.folder_paths[path] = item
                continue

            if path in self.file_paths:
                logger.warning('duplicate file path: ' + path)

            self.file_paths[path] = item
            self.total_size += int(size)
            logger.debug("drive file: " + path)


    def print_md5(self):
        for path,item in self.file_paths.iteritems():
            if 'md5Checksum' not in item:
                continue

            print('{} {}'.format(item['md5Checksum'], path))

if __name__ == "__main__":
    m = grind_md5()
    m.authenticate_saved()
    m.authorize()
    m.create_drive()
    m.get_file_list()
    m.build_tree()
    m.print_md5()
Sorry for the lack of comments and the presence of bugs.