Note
Go to the end to download the full example code
Xarray : Quick tour#
Basically, this example gives a very small introduction to Xarray (very small). We illustrate how to define a DataArray container, access its components, perform some of the basic operations and slicing / indexing.
import numpy as np
import xarray as xr
import pandas as pd
Simulate data#
lets start by creating a random spatio-temporal array
n_times = 30
n_roi = 7
times_vec = np.linspace(-1, 1, n_times)
roi_vec = np.array([f"roi_{k}" for k in range(n_roi)])
np_data = np.random.rand(n_times, n_roi)
print(np_data.shape)
print('*' * 79)
(30, 7)
*******************************************************************************
Xarray conversion and access to the internal components#
A DataArray is a container (like a well known numpy array) except that you can add a label to each coordinate. To this end, the input dims is a tuple that describes the dimension names and coords describes the value along this coordinate
# let's convert it to a DataArray
da_data = xr.DataArray(np_data, dims=('times', 'roi'),
coords=(times_vec, roi_vec))
print(da_data.shape)
print(da_data)
print('*' * 79)
# if you want to get the dimension names and values
print(f'Dimension names : {da_data.dims}')
print(f'Dimension values : {da_data.coords}')
print(f"Data of a specific dimension : {da_data.roi.data}")
print('*' * 79)
# if you want to get the original NumPy array enter the following :
da_data.data
# if you want to change the values of a coordinate
da_data['roi'] = np.array([f"roi_{k % 3}" for k in range(n_roi)])
print(f"New ROI names : {da_data.roi.data}")
print('*' * 79)
# if you need to compute or get the min / max / mean across a specific
# dimension
da_data.min('times') # minimum across time points
da_data.max('times') # maximum across time points
da_data.mean('roi') # mean across all ROI
# similarly to Pandas, it's also possible to group along a dimension and then
# take the mean. For example, here's how to group and mean by roi names
da_m = da_data.groupby('roi').mean('roi')
print(da_m)
print('*' * 79)
(30, 7)
<xarray.DataArray (times: 30, roi: 7)>
array([[0.45250423, 0.81953048, 0.68086604, 0.13671123, 0.35727182,
0.69705263, 0.18550697],
[0.03348622, 0.60993492, 0.57175041, 0.44628991, 0.60917852,
0.32409775, 0.57827572],
[0.1745737 , 0.68033589, 0.3580109 , 0.76263188, 0.17549235,
0.29729328, 0.47052988],
[0.44484418, 0.15133033, 0.27449366, 0.60742285, 0.86630082,
0.54416071, 0.93129645],
[0.18373614, 0.5903104 , 0.35515531, 0.2989301 , 0.19236575,
0.63762854, 0.63306055],
[0.3281656 , 0.87692316, 0.84959453, 0.54985696, 0.83498028,
0.37108317, 0.20976327],
[0.47049288, 0.56452455, 0.10584779, 0.08250566, 0.72504829,
0.2737648 , 0.87159849],
[0.83923488, 0.59514662, 0.32759059, 0.2522432 , 0.38707588,
0.78638133, 0.02435129],
[0.52256746, 0.60323761, 0.54008351, 0.9237543 , 0.1594425 ,
0.19056107, 0.19126668],
[0.86825348, 0.78603006, 0.23996522, 0.64042716, 0.13678682,
0.8015636 , 0.88131854],
...
[0.27072135, 0.1256662 , 0.90822865, 0.81374921, 0.39051523,
0.81447342, 0.92334286],
[0.56047253, 0.17418467, 0.24712609, 0.8671212 , 0.56349887,
0.05852145, 0.52599213],
[0.50294822, 0.33203095, 0.45564797, 0.90327296, 0.44672216,
0.53741464, 0.13795948],
[0.56662134, 0.49131709, 0.89062454, 0.29382485, 0.26374601,
0.2917747 , 0.88206192],
[0.04601255, 0.73168949, 0.64634379, 0.80054755, 0.41159759,
0.99514001, 0.20837187],
[0.8347184 , 0.42530164, 0.217003 , 0.2869445 , 0.33682909,
0.38828474, 0.91538539],
[0.64718667, 0.49830466, 0.65288199, 0.21431652, 0.72321654,
0.68441242, 0.15672775],
[0.20195569, 0.30838732, 0.73105151, 0.26092498, 0.08883536,
0.13203418, 0.55180883],
[0.2297453 , 0.93047633, 0.57148398, 0.39547661, 0.81432978,
0.47026569, 0.26262482],
[0.16014147, 0.86258383, 0.02874299, 0.51285796, 0.85892025,
0.79302081, 0.14379835]])
Coordinates:
* times (times) float64 -1.0 -0.931 -0.8621 -0.7931 ... 0.8621 0.931 1.0
* roi (roi) <U5 'roi_0' 'roi_1' 'roi_2' 'roi_3' 'roi_4' 'roi_5' 'roi_6'
*******************************************************************************
Dimension names : ('times', 'roi')
Dimension values : Coordinates:
* times (times) float64 -1.0 -0.931 -0.8621 -0.7931 ... 0.8621 0.931 1.0
* roi (roi) <U5 'roi_0' 'roi_1' 'roi_2' 'roi_3' 'roi_4' 'roi_5' 'roi_6'
Data of a specific dimension : ['roi_0' 'roi_1' 'roi_2' 'roi_3' 'roi_4' 'roi_5' 'roi_6']
*******************************************************************************
New ROI names : ['roi_0' 'roi_1' 'roi_2' 'roi_0' 'roi_1' 'roi_2' 'roi_0']
*******************************************************************************
<xarray.DataArray (times: 30, roi: 3)>
array([[0.25824081, 0.58840115, 0.68895933],
[0.35268395, 0.60955672, 0.44792408],
[0.46924515, 0.42791412, 0.32765209],
[0.66118783, 0.50881557, 0.40932719],
[0.37190893, 0.39133807, 0.49639193],
[0.36259528, 0.85595172, 0.61033885],
[0.47486567, 0.64478642, 0.1898063 ],
[0.37194312, 0.49111125, 0.55698596],
[0.54586281, 0.38134005, 0.36532229],
[0.79666639, 0.46140844, 0.52076441],
[0.50557987, 0.62123993, 0.29911909],
[0.53282052, 0.28000556, 0.51688376],
[0.49236097, 0.20471139, 0.62999683],
[0.28847505, 0.40477803, 0.29917206],
[0.3940753 , 0.61786047, 0.81810043],
[0.42780297, 0.46607415, 0.13133179],
[0.69984301, 0.83090841, 0.65974832],
[0.49748786, 0.54342099, 0.4441187 ],
[0.58063406, 0.47225411, 0.12471474],
[0.05651046, 0.22247035, 0.49880481],
[0.66927114, 0.25809071, 0.86135104],
[0.65119529, 0.36884177, 0.15282377],
[0.51472689, 0.38937656, 0.4965313 ],
[0.58083604, 0.37753155, 0.59119962],
[0.35164399, 0.57164354, 0.8207419 ],
[0.6790161 , 0.38106536, 0.30264387],
[0.33941031, 0.6107606 , 0.6686472 ],
[0.33822983, 0.19861134, 0.43154284],
[0.29594891, 0.87240305, 0.52087484],
[0.27226593, 0.86075204, 0.4108819 ]])
Coordinates:
* times (times) float64 -1.0 -0.931 -0.8621 -0.7931 ... 0.8621 0.931 1.0
* roi (roi) object 'roi_0' 'roi_1' 'roi_2'
*******************************************************************************
Xarray slicing and indexing#
Now we show how to slice the container
# select a single specific ROI based on it's name
da_data.sel(roi='roi_0')
# select a time range
da_time_slice = da_data.sel(times=slice(-.5, .5))
print(f"Temporal selection : {da_time_slice.coords}")
print('*' * 79)
# off course, spatio-temporal selection is also supported
da_st = da_data.sel(times=slice(-.5, .5), roi='roi_1')
print(f"Spatio-temporal selection : {da_st.coords}")
print('*' * 79)
# you can also slice according to indices
da_isel = da_data.isel(times=slice(10, 20))
print(f"Integer selection : {da_isel.coords}")
print('*' * 79)
# however, if you want for example select multiple items based on their names,
# you have to use booleans. Here's a small example that's using Pandas
roi = da_data.roi.data
use_roi = ['roi_0', 'roi_2']
is_roi = pd.Series(roi).str.contains('|'.join(use_roi))
da_mi = da_data.isel(roi=is_roi)
print(f"Multi-items selection : {da_mi.coords}")
Temporal selection : Coordinates:
* times (times) float64 -0.4483 -0.3793 -0.3103 ... 0.3103 0.3793 0.4483
* roi (roi) <U5 'roi_0' 'roi_1' 'roi_2' 'roi_0' 'roi_1' 'roi_2' 'roi_0'
*******************************************************************************
Spatio-temporal selection : Coordinates:
* times (times) float64 -0.4483 -0.3793 -0.3103 ... 0.3103 0.3793 0.4483
* roi (roi) <U5 'roi_1' 'roi_1'
*******************************************************************************
Integer selection : Coordinates:
* times (times) float64 -0.3103 -0.2414 -0.1724 ... 0.1724 0.2414 0.3103
* roi (roi) <U5 'roi_0' 'roi_1' 'roi_2' 'roi_0' 'roi_1' 'roi_2' 'roi_0'
*******************************************************************************
Multi-items selection : Coordinates:
* times (times) float64 -1.0 -0.931 -0.8621 -0.7931 ... 0.8621 0.931 1.0
* roi (roi) <U5 'roi_0' 'roi_2' 'roi_0' 'roi_2' 'roi_0'
Xarray attributes#
One of the nice features of DataArray is that it supporting setting attributes. Therefore you can add, for example, the parameters that describe your analysis
# adding a few string attributes
da_data.attrs['inference'] = 'ffx'
da_data.attrs['stats'] = 'cluster-based'
da_data.attrs['description'] = """Here's a small description of the analysis
I'm currently running. Trying to find a difference between condition 1. vs 2.
"""
# you can also add vectors (but not arrays) to the attributes
da_data.attrs['vector'] = np.arange(30)
# however, "None" seems to pose a problem when saving the results. Therefore,
# one quick way to solve this is simply to convert it into a string
da_data.attrs['none_problem'] = str(None)
print(da_data)
<xarray.DataArray (times: 30, roi: 7)>
array([[0.45250423, 0.81953048, 0.68086604, 0.13671123, 0.35727182,
0.69705263, 0.18550697],
[0.03348622, 0.60993492, 0.57175041, 0.44628991, 0.60917852,
0.32409775, 0.57827572],
[0.1745737 , 0.68033589, 0.3580109 , 0.76263188, 0.17549235,
0.29729328, 0.47052988],
[0.44484418, 0.15133033, 0.27449366, 0.60742285, 0.86630082,
0.54416071, 0.93129645],
[0.18373614, 0.5903104 , 0.35515531, 0.2989301 , 0.19236575,
0.63762854, 0.63306055],
[0.3281656 , 0.87692316, 0.84959453, 0.54985696, 0.83498028,
0.37108317, 0.20976327],
[0.47049288, 0.56452455, 0.10584779, 0.08250566, 0.72504829,
0.2737648 , 0.87159849],
[0.83923488, 0.59514662, 0.32759059, 0.2522432 , 0.38707588,
0.78638133, 0.02435129],
[0.52256746, 0.60323761, 0.54008351, 0.9237543 , 0.1594425 ,
0.19056107, 0.19126668],
[0.86825348, 0.78603006, 0.23996522, 0.64042716, 0.13678682,
0.8015636 , 0.88131854],
...
[0.27072135, 0.1256662 , 0.90822865, 0.81374921, 0.39051523,
0.81447342, 0.92334286],
[0.56047253, 0.17418467, 0.24712609, 0.8671212 , 0.56349887,
0.05852145, 0.52599213],
[0.50294822, 0.33203095, 0.45564797, 0.90327296, 0.44672216,
0.53741464, 0.13795948],
[0.56662134, 0.49131709, 0.89062454, 0.29382485, 0.26374601,
0.2917747 , 0.88206192],
[0.04601255, 0.73168949, 0.64634379, 0.80054755, 0.41159759,
0.99514001, 0.20837187],
[0.8347184 , 0.42530164, 0.217003 , 0.2869445 , 0.33682909,
0.38828474, 0.91538539],
[0.64718667, 0.49830466, 0.65288199, 0.21431652, 0.72321654,
0.68441242, 0.15672775],
[0.20195569, 0.30838732, 0.73105151, 0.26092498, 0.08883536,
0.13203418, 0.55180883],
[0.2297453 , 0.93047633, 0.57148398, 0.39547661, 0.81432978,
0.47026569, 0.26262482],
[0.16014147, 0.86258383, 0.02874299, 0.51285796, 0.85892025,
0.79302081, 0.14379835]])
Coordinates:
* times (times) float64 -1.0 -0.931 -0.8621 -0.7931 ... 0.8621 0.931 1.0
* roi (roi) <U5 'roi_0' 'roi_1' 'roi_2' 'roi_0' 'roi_1' 'roi_2' 'roi_0'
Attributes:
inference: ffx
stats: cluster-based
description: Here's a small description of the analysis\nI'm currently ...
vector: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 ...
none_problem: None
Xarray to an other format#
Finally, we quickly illustrate how to convert a DataArray into, for example, a pandas.DataFrame
print(da_data.to_pandas())
roi roi_0 roi_1 roi_2 roi_0 roi_1 roi_2 roi_0
times
-1.000000 0.452504 0.819530 0.680866 0.136711 0.357272 0.697053 0.185507
-0.931034 0.033486 0.609935 0.571750 0.446290 0.609179 0.324098 0.578276
-0.862069 0.174574 0.680336 0.358011 0.762632 0.175492 0.297293 0.470530
-0.793103 0.444844 0.151330 0.274494 0.607423 0.866301 0.544161 0.931296
-0.724138 0.183736 0.590310 0.355155 0.298930 0.192366 0.637629 0.633061
-0.655172 0.328166 0.876923 0.849595 0.549857 0.834980 0.371083 0.209763
-0.586207 0.470493 0.564525 0.105848 0.082506 0.725048 0.273765 0.871598
-0.517241 0.839235 0.595147 0.327591 0.252243 0.387076 0.786381 0.024351
-0.448276 0.522567 0.603238 0.540084 0.923754 0.159443 0.190561 0.191267
-0.379310 0.868253 0.786030 0.239965 0.640427 0.136787 0.801564 0.881319
-0.310345 0.633122 0.709265 0.483099 0.189008 0.533214 0.115139 0.694610
-0.241379 0.452350 0.407700 0.064877 0.603115 0.152312 0.968891 0.542996
-0.172414 0.013112 0.392413 0.539875 0.500718 0.017010 0.720119 0.963253
-0.103448 0.394916 0.807859 0.541632 0.097280 0.001697 0.056712 0.373229
-0.034483 0.195498 0.896269 0.898990 0.946453 0.339452 0.737210 0.040275
0.034483 0.570378 0.760258 0.172941 0.274468 0.171891 0.089723 0.438563
0.103448 0.749470 0.846777 0.893988 0.583789 0.815039 0.425509 0.766269
0.172414 0.818823 0.458062 0.081947 0.511368 0.628780 0.806290 0.162272
0.241379 0.607081 0.859874 0.101915 0.802825 0.084635 0.147515 0.331996
0.310345 0.076329 0.399032 0.598178 0.049868 0.045909 0.399431 0.043335
0.379310 0.270721 0.125666 0.908229 0.813749 0.390515 0.814473 0.923343
0.448276 0.560473 0.174185 0.247126 0.867121 0.563499 0.058521 0.525992
0.517241 0.502948 0.332031 0.455648 0.903273 0.446722 0.537415 0.137959
0.586207 0.566621 0.491317 0.890625 0.293825 0.263746 0.291775 0.882062
0.655172 0.046013 0.731689 0.646344 0.800548 0.411598 0.995140 0.208372
0.724138 0.834718 0.425302 0.217003 0.286944 0.336829 0.388285 0.915385
0.793103 0.647187 0.498305 0.652882 0.214317 0.723217 0.684412 0.156728
0.862069 0.201956 0.308387 0.731052 0.260925 0.088835 0.132034 0.551809
0.931034 0.229745 0.930476 0.571484 0.395477 0.814330 0.470266 0.262625
1.000000 0.160141 0.862584 0.028743 0.512858 0.858920 0.793021 0.143798
Total running time of the script: (0 minutes 1.301 seconds)
Estimated memory usage: 9 MB