Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
test_download.py 19.06 KiB
import cStringIO
import os
import tarfile
import zipfile


def test_download(data_builder, file_form, as_admin, api_db):
    project = data_builder.create_project(label='project1')
    session = data_builder.create_session(label='session1')
    session2 = data_builder.create_session(label='session1')
    acquisition = data_builder.create_acquisition(session=session)
    acquisition2 = data_builder.create_acquisition(session=session2)

    # upload the same file to each container created and use different tags to
    # facilitate download filter tests:
    # acquisition: [], session: ['plus'], project: ['plus', 'minus']
    file_name = 'test.csv'
    as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form(
        file_name, meta={'name': file_name, 'type': 'csv'}))

    as_admin.post('/acquisitions/' + acquisition2 + '/files', files=file_form(
        file_name, meta={'name': file_name, 'type': 'csv'}))

    as_admin.post('/sessions/' + session + '/files', files=file_form(
        file_name, meta={'name': file_name, 'type': 'csv', 'tags': ['plus']}))

    as_admin.post('/projects/' + project + '/files', files=file_form(
        file_name, meta={'name': file_name, 'type': 'csv', 'tags': ['plus', 'minus']}))

    missing_object_id = '000000000000000000000000'

    # Try to download w/ nonexistent ticket
    r = as_admin.get('/download', params={'ticket': missing_object_id})
    assert r.status_code == 404

    # Retrieve a ticket for a batch download
    r = as_admin.post('/download', json={
        'optional': False,
        'filters': [{'tags': {
            '-': ['minus']
        }}],
        'nodes': [
            {'level': 'project', '_id': project},
        ]
    })
    assert r.ok
    ticket = r.json()['ticket']

    # Perform the download
    r = as_admin.get('/download', params={'ticket': ticket})
    assert r.ok

    tar_file = cStringIO.StringIO(r.content)
    tar = tarfile.open(mode="r", fileobj=tar_file)

    # Verify a single file in tar with correct file name
    found_second_session = False
    for tarinfo in tar:
        assert os.path.basename(tarinfo.name) == file_name
        if 'session1_0' in str(tarinfo.name):
            found_second_session = True
    assert found_second_session

    tar.close()

    # Download one session with many acquisitions and make sure they are in the same subject folder

    acquisition3 = data_builder.create_acquisition(session=session)
    r = as_admin.post('/acquisitions/' + acquisition3 + '/files', files=file_form(
        file_name, meta={'name': file_name, 'type': 'csv'}))
    assert r.ok

    r = as_admin.post('/download', json={
        'optional': False,
        'nodes': [
            {'level': 'acquisition', '_id': acquisition},
            {'level': 'acquisition', '_id': acquisition3},
        ]
    })
    assert r.ok
    ticket = r.json()['ticket']

    # Perform the download
    r = as_admin.get('/download', params={'ticket': ticket})
    assert r.ok

    tar_file = cStringIO.StringIO(r.content)
    tar = tarfile.open(mode="r", fileobj=tar_file)

    # Verify a single file in tar with correct file name
    found_second_session = False
    for tarinfo in tar:
        assert os.path.basename(tarinfo.name) == file_name
        if 'session1_0' in str(tarinfo.name):
            found_second_session = True
    assert not found_second_session

    tar.close()


    # Try to perform the download from a different IP
    update_result = api_db.downloads.update_one(
        {'_id': ticket},
        {'$set': {'ip': '0.0.0.0'}})
    assert update_result.modified_count == 1

    r = as_admin.get('/download', params={'ticket': ticket})
    assert r.status_code == 400

    # Try to retrieve a ticket referencing nonexistent containers
    r = as_admin.post('/download', json={
        'optional': False,
        'nodes': [
            {'level': 'project', '_id': missing_object_id},
            {'level': 'session', '_id': missing_object_id},
            {'level': 'acquisition', '_id': missing_object_id},
        ]
    })
    assert r.status_code == 404

    # Try to retrieve ticket for bulk download w/ invalid container name
    # (not project|session|acquisition)
    r = as_admin.post('/download', params={'bulk': 'true'}, json={
        'files': [{'container_name': 'subject', 'container_id': missing_object_id, 'filename': 'nosuch.csv'}]
    })
    assert r.status_code == 400

    # Try to retrieve ticket for bulk download referencing nonexistent file
    r = as_admin.post('/download', params={'bulk': 'true'}, json={
        'files': [{'container_name': 'project', 'container_id': project, 'filename': 'nosuch.csv'}]
    })
    assert r.status_code == 404

    # Retrieve ticket for bulk download
    r = as_admin.post('/download', params={'bulk': 'true'}, json={
        'files': [{'container_name': 'project', 'container_id': project, 'filename': file_name}]
    })
    assert r.ok
    ticket = r.json()['ticket']

    # Perform the download using symlinks
    r = as_admin.get('/download', params={'ticket': ticket, 'symlinks': 'true'})
    assert r.ok


def test_filelist_download(data_builder, file_form, as_admin):
    session = data_builder.create_session()
    zip_cont = cStringIO.StringIO()
    with zipfile.ZipFile(zip_cont, 'w') as zip_file:
        zip_file.writestr('two.csv', 'sample\ndata\n')
    zip_cont.seek(0)
    session_files = '/sessions/' + session + '/files'
    as_admin.post(session_files, files=file_form('one.csv'))
    as_admin.post(session_files, files=file_form(('two.zip', zip_cont)))

    # try to get non-existent file
    r = as_admin.get(session_files + '/non-existent.csv')
    assert r.status_code == 404

    # try to get file w/ non-matching hash
    r = as_admin.get(session_files + '/one.csv', params={'hash': 'match me if you can'})
    assert r.status_code == 409

    # get download ticket for single file
    r = as_admin.get(session_files + '/one.csv', params={'ticket': ''})
    assert r.ok
    ticket = r.json()['ticket']

    # download single file w/ ticket
    r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket})
    assert r.ok

    # try to get zip info for non-zip file
    r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket, 'info': 'true'})
    assert r.status_code == 400

    # try to get zip member of non-zip file
    r = as_admin.get(session_files + '/one.csv', params={'ticket': ticket, 'member': 'hardly'})
    assert r.status_code == 400

    # try to download a different file w/ ticket
    r = as_admin.get(session_files + '/two.zip', params={'ticket': ticket})
    assert r.status_code == 400

    # get download ticket for zip file
    r = as_admin.get(session_files + '/two.zip', params={'ticket': ''})
    assert r.ok
    ticket = r.json()['ticket']

    # get zip info
    r = as_admin.get(session_files + '/two.zip', params={'ticket': ticket, 'info': 'true'})
    assert r.ok

    # try to get non-existent zip member
    r = as_admin.get(session_files + '/two.zip', params={'ticket': ticket, 'member': 'hardly'})
    assert r.status_code == 400

    # get zip member
    r = as_admin.get(session_files + '/two.zip', params={'ticket': ticket, 'member': 'two.csv'})
    assert r.ok


def test_analysis_download(data_builder, file_form, as_admin, default_payload):
    session = data_builder.create_session()
    acquisition = data_builder.create_acquisition()
    gear_doc = default_payload['gear']['gear']
    gear_doc['inputs'] = {
        'csv': {
            'base': 'file'
        },
        'zip': {
            'base': 'file'
        }
    }
    gear = data_builder.create_gear(gear=gear_doc)


    assert as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form('one.csv')).ok
    assert as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form('two.zip')).ok

    zip_cont = cStringIO.StringIO()
    with zipfile.ZipFile(zip_cont, 'w') as zip_file:
        zip_file.writestr('two.csv', 'sample\ndata\n')
    zip_cont.seek(0)

    # analysis for testing most of the download functionality 
    # analysis_files and new_analysis_files refer to this analyisis
    analysis1 = as_admin.post('/sessions/' + session + '/analyses', files=file_form(
        'one.csv', ('two.zip', zip_cont),
        meta={'label': 'test', 'inputs': [{'name': 'one.csv'}, {'name': 'two.csv'}]}
    )).json()['_id']

    # Analyis Only for testing that inputs are in their own folder
    r = as_admin.post('/sessions/' + session + '/analyses', 
        json={
            'analysis': {'label': 'test'}, 
            'job': {
                'gear_id': gear,
                'inputs': {
                    'csv': {
                        'name': 'one.csv',
                        'type': 'acquisition',
                        'id': acquisition
                    }, 
                    'zip': {
                        'name': 'two.zip',
                        'type': 'acquisition',
                        'id': acquisition
                    }
                }
            }
        }, 
        params={'job':True}
    )
    assert r.ok
    analysis = r.json()['_id']
    analysis_files = '/sessions/' + session + '/analyses/' + analysis1 + '/files'
    new_analysis_files = '/analyses/' + analysis1 + '/files'

    # Check that analysis files are labelled as inputs
    r = as_admin.get('/sessions/' + session + '/analyses/' + analysis)
    assert r.ok
    assert r.json().get('files')[0].get('input')

    # try to download analysis files w/ non-existent ticket
    r = as_admin.get(analysis_files, params={'ticket': '000000000000000000000000'})
    assert r.status_code == 404

    # get analysis batch download ticket for all files
    r = as_admin.get(analysis_files, params={'ticket': ''}, json={"optional":True,"nodes":[{"level":"analysis","_id":analysis1}]})
    assert r.ok
    ticket = r.json()['ticket']

    # filename is analysis_<label> not analysis_<_id>
    assert r.json()['filename'] == 'analysis_test.tar'

    # batch download analysis files w/ ticket from wrong endpoint
    r = as_admin.get(analysis_files, params={'ticket': ticket})
    assert r.status_code == 400

    # batch download analysis files w/ ticket from correct endpoint
    r = as_admin.get('/download', params={'ticket': ticket})
    assert r.ok

    ### Using '/download' endpoint ###
    # try to download analysis files w/ non-existent ticket
    r = as_admin.get('/download', params={'ticket': '000000000000000000000000'})
    assert r.status_code == 404

    # get analysis batch download ticket for all files
    r = as_admin.get('/download', params={'ticket': ''}, json={"optional":True,"nodes":[{"level":"analysis","_id":analysis}]})
    assert r.ok
    ticket = r.json()['ticket']

    # filename is analysis_<label> not analysis_<_id>
    assert r.json()['filename'] == 'analysis_test.tar'

    # batch download analysis files w/ ticket
    r = as_admin.get('/download', params={'ticket': ticket})
    assert r.ok

    # Check to make sure files are in tar
    tar_file = cStringIO.StringIO(r.content)
    tar = tarfile.open(mode="r", fileobj=tar_file)
    members = tar.getmembers()
    assert len(members) == 2
    for tarinfo in members:
        assert os.path.basename(tarinfo.name) in ['one.csv', 'two.zip']
        assert 'input' in tarinfo.name

    tar.close()


    # try to get download ticket for non-existent analysis file
    r = as_admin.get(analysis_files + '/non-existent.csv')
    assert r.status_code == 404

    # get analysis download ticket for single file
    r = as_admin.get(analysis_files + '/one.csv', params={'ticket': ''})
    assert r.ok
    ticket = r.json()['ticket']

    # download single analysis file w/ ticket
    r = as_admin.get(analysis_files + '/one.csv', params={'ticket': ticket})
    assert r.ok

    # try to get zip info for non-zip file
    r = as_admin.get(analysis_files + '/one.csv', params={'ticket': ticket, 'info': 'true'})
    assert r.status_code == 400

    # try to get zip member of non-zip file
    r = as_admin.get(analysis_files + '/one.csv', params={'ticket': ticket, 'member': 'nosuch'})
    assert r.status_code == 400

    # try to download a different file w/ ticket
    r = as_admin.get(analysis_files + '/two.zip', params={'ticket': ticket})
    assert r.status_code == 400

    # get analysis download ticket for zip file
    r = as_admin.get(analysis_files + '/two.zip', params={'ticket': ''})
    assert r.ok
    ticket = r.json()['ticket']

    # get zip info
    r = as_admin.get(analysis_files + '/two.zip', params={'ticket': ticket, 'info': 'true'})
    assert r.ok

    # try to get non-existent zip member
    r = as_admin.get(analysis_files + '/two.zip', params={'ticket': ticket, 'member': 'nosuch'})
    assert r.status_code == 400

    # get zip member
    r = as_admin.get(analysis_files + '/two.zip', params={'ticket': ticket, 'member': 'two.csv'})
    assert r.ok

    ### single file analysis download using FileListHandler ###
    # try to get download ticket for non-existent analysis file
    r = as_admin.get(new_analysis_files + '/non-existent.csv')
    assert r.status_code == 404

    # get analysis download ticket for single file
    r = as_admin.get(new_analysis_files + '/one.csv', params={'ticket': ''})
    assert r.ok
    ticket = r.json()['ticket']

    # download single analysis file w/ ticket
    r = as_admin.get(new_analysis_files + '/one.csv', params={'ticket': ticket})
    assert r.ok

    # try to get zip info for non-zip file
    r = as_admin.get(new_analysis_files + '/one.csv', params={'ticket': ticket, 'info': 'true'})
    assert r.status_code == 400

    # try to get zip member of non-zip file
    r = as_admin.get(new_analysis_files + '/one.csv', params={'ticket': ticket, 'member': 'nosuch'})
    assert r.status_code == 400

    # try to download a different file w/ ticket
    r = as_admin.get(new_analysis_files + '/two.zip', params={'ticket': ticket})
    assert r.status_code == 400

    # get analysis download ticket for zip file
    r = as_admin.get(new_analysis_files + '/two.zip', params={'ticket': ''})
    assert r.ok
    ticket = r.json()['ticket']

    # get zip info
    r = as_admin.get(new_analysis_files + '/two.zip', params={'ticket': ticket, 'info': 'true'})
    assert r.ok

    # try to get non-existent zip member
    r = as_admin.get(new_analysis_files + '/two.zip', params={'ticket': ticket, 'member': 'nosuch'})
    assert r.status_code == 400

    # get zip member
    r = as_admin.get(new_analysis_files + '/two.zip', params={'ticket': ticket, 'member': 'two.csv'})
    assert r.ok


    # delete session analysis (job)
    r = as_admin.delete('/sessions/' + session + '/analyses/' + analysis)
    assert r.ok
    r = as_admin.delete('/sessions/' + session + '/analyses/' + analysis1)
    assert r.ok

def test_filters(data_builder, file_form, as_admin):

    project = data_builder.create_project()
    session = data_builder.create_session()
    acquisition = data_builder.create_acquisition()
    acquisition2 = data_builder.create_acquisition()

    as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form(
        "test.csv", meta={'name': "test.csv", 'type': 'csv', 'tags': ['red', 'blue']}))
    as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form(
        'test.dicom', meta={'name': 'test.dicom', 'type': 'dicom', 'tags': ['red']}))
    as_admin.post('/acquisitions/' + acquisition2 + '/files', files=file_form(
        'test.nifti', meta={'name': 'test.nifti', 'type': 'nifti'}))
    r = as_admin.get('/acquisitions/' + acquisition)
    assert r.ok

    # Malformed filters
    r = as_admin.post('/download', json={
        'optional': False,
        'filters': [
            {'tags': 'red'}
        ],
        'nodes': [
            {'level': 'session', '_id': session},
        ]
    })
    assert r.status_code == 400

    # No filters
    r = as_admin.post('/download', json={
        'optional': False,
        'nodes': [
            {'level': 'session', '_id': session},
        ]
    })
    assert r.ok
    assert r.json()['file_cnt'] == 3

    # Filter by tags
    r = as_admin.post('/download', json={
        'optional': False,
        'filters': [
            {'tags': {'+':['red']}}
        ],
        'nodes': [
            {'level': 'session', '_id': session},
        ]
    })
    assert r.ok
    assert r.json()['file_cnt'] == 2

    # Filter by type
    as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form(
        "test", meta={'name': "test", 'tags': ['red', 'blue']}))
    r = as_admin.post('/download', json={
        'optional': False,
        'filters': [
            {'types': {'+':['nifti']}}
        ],
        'nodes': [
            {'level': 'session', '_id': session},
        ]
    })
    assert r.ok
    assert r.json()['file_cnt'] == 1
    r = as_admin.post('/download', json={
        'optional': False,
        'filters': [
            {'types': {'+':['null']}}
        ],
        'nodes': [
            {'level': 'session', '_id': session},
        ]
    })
    assert r.ok
    assert r.json()['file_cnt'] == 1

def test_summary(data_builder, as_admin, file_form):
    project = data_builder.create_project(label='project1')
    session = data_builder.create_session(label='session1')
    session2 = data_builder.create_session(label='session1')
    acquisition = data_builder.create_acquisition(session=session)
    acquisition2 = data_builder.create_acquisition(session=session2)

    # upload the same file to each container created and use different tags to
    # facilitate download filter tests:
    # acquisition: [], session: ['plus'], project: ['plus', 'minus']
    file_name = 'test.csv'
    as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form(
        file_name, meta={'name': file_name, 'type': 'csv'}))

    as_admin.post('/acquisitions/' + acquisition2 + '/files', files=file_form(
        file_name, meta={'name': file_name, 'type': 'csv'}))

    as_admin.post('/sessions/' + session + '/files', files=file_form(
        file_name, meta={'name': file_name, 'type': 'csv', 'tags': ['plus']}))

    as_admin.post('/projects/' + project + '/files', files=file_form(
        file_name, meta={'name': file_name, 'type': 'csv', 'tags': ['plus', 'minus']}))

    missing_object_id = '000000000000000000000000'

    r = as_admin.post('/download/summary', json=[{"level":"project", "_id":project}])
    assert r.ok
    assert len(r.json()) == 1
    assert r.json().get("csv", {}).get("count",0) == 4 

    r = as_admin.post('/download/summary', json=[{"level":"session", "_id":session}])
    assert r.ok
    assert len(r.json()) == 1
    assert r.json().get("csv", {}).get("count",0) == 2 

    r = as_admin.post('/download/summary', json=[{"level":"acquisition", "_id":acquisition},{"level":"acquisition", "_id":acquisition2}])
    assert r.ok
    assert len(r.json()) == 1
    assert r.json().get("csv", {}).get("count",0) == 2

    r = as_admin.post('/download/summary', json=[{"level":"group", "_id":missing_object_id}])
    assert r.status_code == 400

    r = as_admin.post('/sessions/' + session + '/analyses',  files=file_form(
        file_name, meta={'label': 'test', 'inputs':[{'name':file_name}]}))
    assert r.ok
    analysis = r.json()['_id']
    
    r = as_admin.post('/download/summary', json=[{"level":"analysis", "_id":analysis}])
    assert r.ok
    assert len(r.json()) == 1
    assert r.json().get("tabular data", {}).get("count",0) == 1