Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions harmonica/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from ._forward.tesseroid import tesseroid_gravity
from ._forward.tesseroid_layer import DatasetAccessorTesseroidLayer, tesseroid_layer
from ._gravity_corrections import bouguer_correction
from ._io.geosoft_gxf_io import read_gxf
from ._io.icgem_gdf import load_icgem_gdf
from ._io.oasis_montaj_grd import load_oasis_montaj_grid
from ._isostasy import isostatic_moho_airy
Expand Down
348 changes: 348 additions & 0 deletions harmonica/_io/geosoft_gxf_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,348 @@
# Function to read in GXF files, provided by USGS Generally
# No guarantee this will read in your specific GXF files

# Written by Thomas Martin, Unidata
# Inspired by this gist: https://gist.github.com/jobar8/683483df605a906fb3da747b64627305
# created by Joseph Barraud and made available under the BSD License.

import numpy as np
import xarray as xr
from typing import Tuple, List, Dict, Any

"""
Functions for reading Geosoft GXF (Grid eXchange File) format files.

The GXF format is an ASCII file format for gridded data developed by Geosoft.
For detailed format specification, see:
https://pubs.usgs.gov/of/1999/of99-514/grids/gxf.pdf

Test data provided by USGS:
Kucks, R.P., and Hill, P.L., 2000, Wyoming aeromagnetic and gravity maps and data—
A web site for distribution of data: U.S. Geological Survey Open-File Report 00-0198,
https://pubs.usgs.gov/of/2000/ofr-00-0198/html/wyoming.htm

GXF (Grid eXchange File) is a standard ASCII file format for
exchanging gridded data among different software systems.
Software that supports the GXF standard will be able to import
properly formatted GXF files and export grids in GXF format.

Grid Description:
A grid is a rectangular array of points at which single data
values define a two dimensional function. Grid point locations
are related to a Grid Coordinate System (GCS), which is a right
handed Cartesian system with X and Y axis defined by the bottom
and left sides of a grid array. The grid point at the bottom,
left corner of the array is the origin of the GCS. All distances
are in meters.

GCS coordinates are related to a Base Coordinate System (BCS)
through a plane translation and rotation. The origin of the GCS
is located at point (x0,y0) in the BCS, and the X and Y grid
indices are related to BCS units through the separation between
points in the GCS X and Y directions.

Labeled Data Objects and Comments

A GXF file is an ASCII file made up of a number of labeled data
objects and comments. Each labeled data object has a label line
followed by one or more data lines. A label line is identified
by a '#' character in the first column followed immediately by an
upper-case label. The data associated with that label are found
on one or more lines that follow the label.

Lines

All lines in a GXF file must be less than or equal to 80
characters in length. Any lines that are not part of a labeled
data object are ignored and can be used to place comments within
a GXF file. Programs that read GXF files will skip such comment
lines while they search for the next GXF data object.

GXF Object Definitions

#TITLE
A one line descriptive title of the grid. Some grid formats
include textual descriptions of the grid, and this information
can be placed in a #TITLE object.
Default: blank title

#POINTS
The number of points in each grid row (horizontal or vertical as
defined by the #SENSE object).
Default: no default - this object is required.

#ROWS
The number of rows in the grid. A grid row (or vector) is a
collection of consecutive grid points that represent the grid
values along a horizontal or vertical line in the grid. The
complete grid is then defined by a consecutive sequence of grid
rows.
Default: no default - this object is required.

#PTSEPARATION
The separation between points in the grid. This should be in
Base Coordinate System units (ground units for geographically
based grids).
Default: 1.0

#RWSEPARATION
The separation between rows in the grid. These should be in Base
Coordinate System units (ground units for geographically based
grids).
Default: 1.0

#XORIGIN
The X location of the bottom left corner of the grid in the Base
Coordinate System.
Default: 0.0

#YORIGIN
The Y location of the bottom left corner of the grid in the Base
Coordinate System.
Default: 0.0

#ROTATION
The rotation angle of the grid. This is the counter-clockwise
angle of the bottom edge of the grid with respect to the Base
Coordinate System X axis. Rotation only has meaning for Base
Coordinate Systems that use the same units on the X and Y axis.
Default: 0.0

#SENSE
The first point of the first row of the stored grid can be at any
corner of the grid rectangle, and the grid rows can be run
vertically or horizontally. The SENSE object defines this storage
sense as follows:
ą1 first point at bottom left of grid
ą2 first point at upper left of grid
ą3 first point at upper right of grid
ą4 first point at bottom right of grid
A positive SENSE stores rows in a right-handed sense; a negative
SENSE stores rows in a left-handed sense. This means that if you
were standing at the first grid point and looking into the grid,
the first grid row would extend to your right for a right handed
grid (positive sense), or to your left for a left handed sense
(left-handed grid): (All grids on this CD have SENSE=+1.)
Default: 1 (first point at bottom left, rows left to
right)

#TRANSFORM
This keyword is followed by two numbers on the same line: SCALE
and OFFSET, which are used to transform the grid data to desired
units:
Z = G * SCALE + OFFSET
where
Z grid value in the desired unit
G are grid values as specified in the #GRID object
Default: SCALE = 1.0, OFFSET = 0.0

#DUMMY
The grid must be rectangular (every row must have the same number
of points). The dummy value defined by this object is used to
define blank areas of the grid. Any grids that include blank
areas must define a dummy value.
Default: no dummy value.

#GRID
The grid data is listed point by point and row by row. The #GRID
object and data is always the last object in a GXF file. The
first data point is at the location indicated by #SENSE, and is
followed by successive points in that row of points (either
horizontal or vertical), then the points in the next row, and so
on. The points in a row can follow on to the next data line,
although each new row must start on a new data line. A GXF
reading program can expect #ROWS of #POINTS for a total of #ROWS
times #POINTS data values.
Default: none, must be included as the last object in a
GXF file.

Data for testing is also from the USGS:

Kucks, R.P., and Hill, P.L., 2000, Wyoming aeromagnetic and gravity maps and data—A web site for distribution of data: U.S. Geological Survey Open-File Report 00-0198, https://pubs.usgs.gov/of/2000/ofr-00-0198/html/wyoming.htm

"""

"""
Function to read USGS GXF file
"""


def _read_gxf_data(infile: str) -> Tuple[np.ndarray, Dict[str, Any]]:
"""
Read a GXF file and return the grid data and metadata.
Following official GXF specifications from Geosoft.

Parameters:
infile (str): Path to the GXF file

Returns:
tuple: (grid_array, metadata)
- grid_array: numpy array containing the properly oriented grid
- metadata: dictionary containing all GXF parameters
"""
# Parse GXF file line-by-line to avoid loading entire file into memory
# This is more memory efficient for large grid files
headers: Dict[str, str] = {}
data_lines: List[str] = []
reading_data = False # Flag to track when we've reached the #GRID section
pending_header_key = None # Store header key waiting for its value on next line

# Process file line by line rather than loading all lines at once
# This approach minimizes memory usage for large GXF files
with open(infile, "r") as f:
for line in f:
line = line.rstrip('\n\r') # Remove line endings

# Skip empty lines throughout the file
if not line:
continue

if reading_data:
# We're in the data section after #GRID - collect all data lines
data_lines.append(line)
elif line.startswith('#'):
# This is a header line - extract the key (remove '#' prefix)
key = line[1:]
if key == 'GRID':
# Special case: #GRID marks start of data section
reading_data = True
else:
# Regular header - store key and wait for value on next line
pending_header_key = key
elif pending_header_key:
# This line contains the value for the previously found header key
# GXF format: header key on one line, value on the next non-empty line
headers[pending_header_key] = line
pending_header_key = None # Clear the pending key

# Parse metadata with proper type conversion
metadata: Dict[str, Any] = {}
for key, value in headers.items():
try:
# Try converting to float first (handles scientific notation)
metadata[key] = float(value)
# If it's a whole number, convert to int
if metadata[key].is_integer():
metadata[key] = int(metadata[key])
except ValueError:
# If conversion fails, keep as string
metadata[key] = value.strip()

# Process the collected data lines into a 1D numpy array
# GXF data may have inconsistent columns per line, so we parse manually
data_values = []
for line in data_lines:
# Split each data line into individual values and convert to float
# GXF format allows multiple values per line with variable spacing
values = line.split()
data_values.extend([float(val) for val in values])

# Convert to numpy array - this is now our complete 1D data array
data_1d = np.array(data_values)

# Get grid dimensions from already-parsed metadata
nrows = metadata['ROWS']
ncols = metadata['POINTS']

# Reshape to 2D array
grid_array = data_1d.reshape((nrows, ncols))

# Handle dummy values
if 'DUMMY' in metadata:
dummy_value = metadata['DUMMY']
grid_array[grid_array == dummy_value] = np.nan

# Handle grid orientation based on SENSE parameter
sense = str(metadata.get('SENSE', 1)) # Default to 1 if not specified
if sense == '1': # First point at bottom left of grid
grid_array = np.flipud(grid_array)

# Add convenient grid parameters
metadata.update({
'nx': ncols,
'ny': nrows,
'x_inc': metadata.get('PTSEPARATION', metadata.get('XSEP', 1.0)),
'y_inc': metadata.get('RWSEPARATION', metadata.get('YSEP', 1.0)),
'x_min': metadata.get('XORIGIN', 0.0),
'y_min': metadata.get('YORIGIN', 0.0)
})

# Add projection information if available
if 'PRJTYPE' in metadata:
metadata['projection'] = {
'type': metadata['PRJTYPE'],
'units': metadata.get('PRJUNIT', 'unknown'),
'parameters': {
'semi_major_axis': metadata.get('A_AXIS_RADIUS'),
'semi_minor_axis': metadata.get('B_AXIS_RADIUS'),
'reference_longitude': metadata.get('RFLONGITUDE'),
'reference_latitude': metadata.get('RFLATITUDE'),
'first_standard_parallel': metadata.get('FIRST_STANDARD_PARALLEL'),
'second_standard_parallel': metadata.get('SECOND_STANDARD_PARALLEL'),
'false_easting': metadata.get('FLSEASTING'),
'false_northing': metadata.get('FLSNORTHING')
}
}

return grid_array, metadata


def read_gxf(infile: str) -> xr.DataArray:
"""
Read a GXF file and convert it to an xarray DataArray with proper coordinates.

The GXF format is an ASCII file format for gridded data developed by
Geosoft. This function reads the header information and grid data,
returning it as an xarray.DataArray for convenient analysis.

Parameters
----------
infile : str
Path to the GXF file

Returns
-------
grid : xarray.DataArray
An xarray.DataArray containing the grid data with appropriate
coordinates and metadata from the GXF header stored in attributes.
"""
# Read the GXF file using existing function
grid_array, metadata = _read_gxf_data(infile)

# Create coordinate arrays
x_coords = np.arange(metadata['nx']) * metadata['x_inc'] + metadata['x_min']
y_coords = np.arange(metadata['ny']) * metadata['y_inc'] + metadata['y_min']

# Determine coordinate names based on rotation
rotation = float(metadata.get('ROTATION', 0.0))
if rotation == 0.0:
x_name, y_name = 'easting', 'northing'
else:
x_name, y_name = 'x', 'y'

# Create DataArray with coordinates
coords = {x_name: x_coords, y_name: y_coords}
dims = [y_name, x_name]

da = xr.DataArray(
data=grid_array,
dims=dims,
coords=coords,
name=metadata.get('TITLE', 'GXF_Grid')
)

# Add all metadata as attributes
attrs = metadata.copy()

# If projection exists, flatten its nested structure for DataArray attributes
if 'projection' in attrs:
proj_info = attrs.pop('projection')
attrs['projection_type'] = proj_info['type']
attrs['projection_units'] = proj_info['units']
# Add non-None projection parameters with a proj_ prefix
for key, value in proj_info['parameters'].items():
if value is not None:
attrs[f'proj_{key}'] = value

da.attrs = attrs
return da
Loading