#!/usr/bin/env python2
"""
Copyright (c) 2014 Martin Hundebøll
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
from oauth2client.client import OAuth2WebServerFlow, FlowExchangeError, OAuth2Credentials
from apiclient.discovery import build
from apiclient import errors
import httplib2
import logging
import sys
import os
logger = logging.getLogger(__file__)
ch = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s: %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
logger.setLevel(logging.INFO)
CLIENT_ID = '472054675343-83t5nuooa4u0180tf3o80j74hd0sh3pp.apps.googleusercontent.com'
CLIENT_SECRET = 'A_Jqz_0bh9nlZW_F9M7ItYOw'
SCOPE = 'https://www.googleapis.com/auth/drive'
REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'
CRED_PATH = 'creds.json'
class grind_md5(object):
backoff_time = 1
file_items = []
file_index = {}
file_paths = {}
folder_index = {}
folder_paths = {}
total_size = 0
def authenticate_new(self):
self.flow = OAuth2WebServerFlow(CLIENT_ID, CLIENT_SECRET, SCOPE, REDIRECT_URI)
auth_uri = self.flow.step1_get_authorize_url()
print('Go to the following link in your browser: ' + auth_uri)
code = raw_input('Enter verification code: ').strip()
try:
self.credentials = self.flow.step2_exchange(code)
except FlowExchangeError as e:
logger.error("unable to authenticate: " + e.message)
sys.exit(1)
json = self.credentials.to_json()
f = open(CRED_PATH, 'wb')
f.write(json)
f.close()
def authenticate_saved(self):
try:
f = open(CRED_PATH, 'rb')
json = f.read()
self.credentials = OAuth2Credentials.from_json(json)
except (IOError, ValueError) as e:
logger.error('unable to load credentials: {}'.format(e))
self.authenticate_new()
def authorize(self):
h = httplib2.Http()
self.http = self.credentials.authorize(h)
def create_drive(self):
drive = build('drive', 'v2', http=self.http)
if drive is None:
logger.error("Failed to create drive object")
sys.exit(1)
self.drive = drive
def get_file_list(self):
fields = [
'createdDate',
'downloadUrl',
'fileExtension',
'fileSize',
'id',
'kind',
'md5Checksum',
'mimeType',
'modifiedByMeDate',
'modifiedDate',
'originalFilename',
'title',
'parents(id,isRoot)'
]
page_token = None
while True:
try:
param = {
'q': 'trashed=false',
'maxResults': 1000,
'fields': 'items(' + ','.join(fields) + '),nextPageToken'
}
logger.info("resolving drive files ({} files received)".format(len(self.file_items)))
if page_token:
param['pageToken'] = page_token
files = self.drive.files().list(**param).execute()
self.file_items += files['items']
page_token = files.get('nextPageToken')
self.backoff_time = 1
if not page_token:
break
except errors.HttpError as e:
logger.error("Failed to receive file list from drive: {}".format(e))
time.sleep(self.backoff_time)
self.backoff_time *= 2
logger.info('resolved {} files/folders'.format(len(self.file_items)))
def recurse_tree(self, item, path = None):
title = item['title'].encode('utf-8')
if path:
path = os.path.join(title, path)
else:
path = title
if not item['parents']:
return path
if item['parents'][0]['isRoot']:
return path
parent_id = item['parents'][0]['id']
if parent_id not in self.file_index:
logger.error('incomplete tree: {}'.format(path))
return path
parent = self.file_index[parent_id]
return self.recurse_tree(parent, path)
def build_tree(self):
logger.info('building drive tree')
for item in self.file_items:
i = item['id']
self.file_index[i] = item
for item in self.file_items:
i = item['id']
size = item.get('fileSize', 0)
path = self.recurse_tree(item)
item['path'] = path
if item['mimeType'] == 'application/vnd.google-apps.folder':
if path in self.folder_paths:
logger.warning('duplicate folder name: ' + path)
self.folder_index[i] = item
self.folder_paths[path] = item
continue
if path in self.file_paths:
logger.warning('duplicate file path: ' + path)
self.file_paths[path] = item
self.total_size += int(size)
logger.debug("drive file: " + path)
def print_md5(self):
for path,item in self.file_paths.iteritems():
if 'md5Checksum' not in item:
continue
print('{} {}'.format(item['md5Checksum'], path))
if __name__ == "__main__":
m = grind_md5()
m.authenticate_saved()
m.authorize()
m.create_drive()
m.get_file_list()
m.build_tree()
m.print_md5()