编辑:我已经在我的其他答案中发布了正确的方法来执行此操作; 首先删除谷歌存储中的数据.这样你就不会有太大的数据.
好吧,我没有找到用熊猫直接做的方法,所以我不得不用普通的API写一些额外的东西.这是我的修复(也是大部分没有Pandas的本地工作):
sProjectID = "project-id" sQuery = ''' SELECT column1, column2 FROM [dataset_name.tablename] ''' df = create_dataframe(sQuery, sProjectID, bLargeResults=True) #*******Functions to make above work********* def create_dataframe(sQuery, sProjectID, bLargeResults=False): "takes a BigQuery sql query and returns a Pandas dataframe" if bLargeResults: oService = create_service() dDestinationTable = run_query(sQuery, oService, sProjectID) df = pandas_get_table(dDestinationTable) else: df = pandas_query(sQuery, sProjectID) return df def pandas_query(sQuery, sProjectID): "go into bigquery and get the table with sql query and return dataframe" from pandas.io import gbq df = gbq.read_gbq(sQuery, sProjectID) return df def pandas_get_table(dTable): "fetch a table and return dataframe" from pandas.io import gbq sProjectID = dTable['projectId'] sDatasetID = dTable['datasetId'] sTableID = dTable['tableId'] sQuery = "SELECT * FROM [{}.{}]".format(sDatasetID, sTableID) df = gbq.read_gbq(sQuery, sProjectID) return df def create_service(): "create google service" from oauth2client.client import GoogleCredentials from apiclient.discovery import build credentials = GoogleCredentials.get_application_default() oService = build('bigquery', 'v2', credentials=credentials) return oService def run_query(sQuery, oService, sProjectID): "runs the bigquery query" dQuery = { 'configuration': { 'query': { 'writeDisposition': 'OVERWRITE', 'useQueryCache': False, 'allowLargeResults': True, 'query': sQuery, 'destinationTable': { 'projectId': sProjectID, 'datasetId': 'sandbox', 'tableId': 'api_large_result_dropoff', }, } } } job = oService.jobs().insert(projectId=sProjectID, body=dQuery).execute() return job['configuration']['query']['destinationTable']