{ "metadata": { "name": "datacontrole" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import xlrd\n", "import os\n", "import numpy as np" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "xlfile = 'datasets_BwN.xls'\n", "ecodir = '/Volumes/trunk/ecoshape'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Which datasets do we expect" ] }, { "cell_type": "code", "collapsed": false, "input": [ "book = xlrd.open_workbook(xlfile)\n", "sheet = book.sheet_by_name('netherlands')\n", "assert sheet.cell_value(0,0) == u'Type of data:'\n", "datatypescolumn = sheet.col(0)[1:]\n", "expected_datatypes = set()\n", "for x in datatypescolumn:\n", " if x.value:\n", " case, dataset = x.value.split('-')\n", " expected_datatypes.add((case.strip(), dataset.strip()))\n", " \n", "print expected_datatypes" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "set([(u'MIJ3.3', u'Grainsizes'), (u'ZWD', u'Bottom animals'), (u'AMS3/HK2.3', u'Benthos'), (u'HK3.8', u'Fish'), (u'ZWD', u'Oysterreefs'), (u'MIJ3.3', u'Bathymetry'), (u'SandEngine', u'Fish'), (u'ZWD', u'Birds'), (u'ZWD', u'Oysterdams'), (u'AMS0.3', u'Birds'), (u'ZWD', u'Wind'), (u'ZWD', u'Velocities'), (u'ZWD', u'Suspended matter'), (u'ZWD', u'Sediment'), (u'ZWD', u'Water levels'), (u'ZWD', u'Waves'), (u'AMS3/HK2.3', u'Fish'), (u'ZWD', u'Bathymetry')])\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "observed_datatypes = set()\n", "cases = [path for path in os.listdir(ecodir) if os.path.isdir(os.path.join(ecodir,path))]\n", "for dirname, dirs, filenames in os.walk(ecodir):\n", " if dirname.count(os.path.sep) != 5:\n", " continue\n", " case, dataset = dirname.split(os.path.sep)[-2:]\n", " observed_datatypes.add((case.strip(), dataset.strip()))\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "print \"Unexpected observed\"\n", "print observed_datatypes - expected_datatypes\n", "print \"Expected but observed\"\n", "print expected_datatypes - observed_datatypes\n", "print \"Expected and observed\"\n", "print expected_datatypes & observed_datatypes\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Unexpected observed\n", "set([('HK3.2', '2011'), ('HK3.2', 'wavetransformation'), ('HK3.2', '2010'), ('MIJ3.3', 'Workumerwaard'), ('HK3.2', '2009baseline')])\n", "Expected but observed\n", "set([(u'MIJ3.3', u'Grainsizes'), (u'ZWD', u'Bottom animals'), (u'ZWD', u'Sediment'), (u'AMS3/HK2.3', u'Fish'), (u'ZWD', u'Oysterreefs'), (u'MIJ3.3', u'Bathymetry'), (u'SandEngine', u'Fish'), (u'ZWD', u'Birds'), (u'ZWD', u'Oysterdams'), (u'AMS0.3', u'Birds'), (u'ZWD', u'Wind'), (u'ZWD', u'Velocities'), (u'ZWD', u'Water levels'), (u'AMS3/HK2.3', u'Benthos'), (u'ZWD', u'Suspended matter'), (u'ZWD', u'Waves'), (u'HK3.8', u'Fish'), (u'ZWD', u'Bathymetry')])\n", "Expected and observed\n", "set([])\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 } ], "metadata": {} } ] }