microsoft · Chenglong-MS · Dec 9, 2025 · Dec 9, 2025 · Jan 17, 2026 · Jan 17, 2026
diff --git a/package.json b/package.json
@@ -46,7 +46,7 @@
         "validator": "^13.15.20",
         "vega": "^6.2.0",
         "vega-embed": "^6.21.0",
-        "vega-lite": "^5.5.0",
+        "vega-lite": "6.4.1",
         "vm-browserify": "^1.1.2"
     },
     "scripts": {

diff --git a/py-src/data_formulator/agents/agent_py_data_rec.py b/py-src/data_formulator/agents/agent_py_data_rec.py
@@ -33,7 +33,7 @@
     "display_instruction": "..." // string, the even shorter verb phrase describing the users' goal.
     "recommendation": "..." // string, explain why this recommendation is made
     "output_fields": [...] // string[], describe the desired output fields that the output data should have (i.e., the goal of transformed data), it's a good idea to preseve intermediate fields here
-    "chart_type": "" // string, one of "point", "bar", "line", "area", "heatmap", "group_bar". "chart_type" should either be inferred from user instruction, or recommend if the user didn't specify any.
+    "chart_type": "" // string, one of "point", "bar", "line", "area", "heatmap", "group_bar", 'boxplot'. "chart_type" should either be inferred from user instruction, or recommend if the user didn't specify any.
     "chart_encodings": {
         "x": "",
         "y": "",
@@ -65,7 +65,7 @@
         - if you mention column names from the input or the output data, highlight the text in **bold**.
             * the column can either be a column in the input data, or a new column that will be computed in the output data.
             * the mention don't have to be exact match, it can be semantically matching, e.g., if you mentioned "average score" in the text while the column to be computed is "Avg_Score", you should still highlight "**average score**" in the text.
-    - "chart_type" must be one of "point", "bar", "line", "area", "heatmap", "group_bar"
+    - "chart_type" must be one of "point", "bar", "line", "area", "heatmap", "group_bar", "boxplot"
     - "chart_encodings" should specify which fields should be used to create the visualization
         - decide which visual channels should be used to create the visualization appropriate for the chart type.
             - point: x, y, color, size, facet
@@ -75,6 +75,7 @@
             - area: x, y, color, facet
             - heatmap: x, y, color, facet
             - group_bar: x, y, color, facet
+            - boxplot: x, y, color, facet
         - note that all fields used in "chart_encodings" should be included in "output_fields".
             - all fields you need for visualizations should be transformed into the output fields!
             - "output_fields" should include important intermediate fields that are not used in visualization but are used for data transformation.
@@ -108,6 +109,10 @@
                 - best for: Trends over time, continuous data
             - (heatmap) Heatmaps: x,y: Categorical (you need to convert quantitative to nominal), color: Quantitative intensity, 
                 - best for: Pattern discovery in matrix data
+            - (boxplot) Box plots: x: Categorical (nominal/ordinal), y: Quantitative, color: Categorical (optional for creating grouped boxplots), 
+                - best for: Distribution of a quantitative field
+                - use x values directly if x values are categorical, and transform the data into bins if the field values are quantitative.
+                - when color is specified, the boxplot will be grouped automatically (items with the same x values will be grouped).
         - facet channel is available for all chart types, it supports a categorical field with small cardinality to visualize the data in different facets.
         - if you really need additional legend fields:
             - you can use opacity for legend (support Quantitative and Categorical).
@@ -135,7 +140,7 @@
 
     2. Then, write a python function based on the inferred goal, the function input is a dataframe "df" (or multiple dataframes based on tables presented in the [CONTEXT] section) and the output is the transformed dataframe "transformed_df". 
 "transformed_df" should contain all "output_fields" from the refined user intent in the json object.
-The python function must follow the template provided in [TEMPLATE], do not import any other libraries or modify function name. The function should be as simple as possible and easily readable. 
+The python function must follow the template provided in [TEMPLATE]. The function should be as simple as possible and easily readable. 
 If there is no data transformation needed based on "output_fields", the transformation function can simply "return df".
 
 [TEMPLATE]
@@ -144,7 +149,7 @@
 import pandas as pd
 import collections
 import numpy as np
-from sklearn import ... # import necessary libraries from sklearn if needed
+# from sklearn import ... # import from sklearn if you need it.
 
 def transform_data(df1, df2, ...): 
     # complete the template here

diff --git a/py-src/data_formulator/agents/agent_py_data_transform.py b/py-src/data_formulator/agents/agent_py_data_transform.py
@@ -89,7 +89,7 @@
 import pandas as pd
 import collections
 import numpy as np
-from sklearn import ... # import necessary libraries from sklearn if needed
+# from sklearn import ... # import from sklearn if you need it.
 
 def transform_data(df1, df2, ...): 
     # complete the template here

diff --git a/py-src/data_formulator/tables_routes.py b/py-src/data_formulator/tables_routes.py
@@ -841,4 +841,86 @@
         return jsonify({
             "status": "error", 
             "message": safe_msg
+        }), status_code
+
+
+@tables_bp.route('/refresh-derived-data', methods=['POST'])
+def refresh_derived_data():
+    """
+    Re-run Python transformation code with new input data to refresh a derived table.
+    
+    This endpoint takes:
+    - input_tables: list of {name: string, rows: list} objects representing the parent tables
+    - code: the Python transformation code to execute
+    
+    Returns:
+    - status: 'ok' or 'error'
+    - rows: the resulting rows if successful
+    - message: error message if failed
+    """
+    try:
+        from data_formulator.py_sandbox import run_transform_in_sandbox2020
+        from flask import current_app
+
+        data = request.get_json()
+        input_tables = data.get('input_tables', [])
+        code = data.get('code', '')
+
+        if not input_tables:
+            return jsonify({
+                "status": "error",
+                "message": "No input tables provided"
+            }), 400
+
+        if not code:
+            return jsonify({
+                "status": "error", 
+                "message": "No transformation code provided"
+            }), 400
+
+        # Convert input tables to pandas DataFrames
+        df_list = []
+        for table in input_tables:
+            table_name = table.get('name', '')
+            table_rows = table.get('rows', [])
+
+            if not table_rows:
+                return jsonify({
+                    "status": "error",
+                    "message": f"Table '{table_name}' has no rows"
+                }), 400
+
+            df = pd.DataFrame.from_records(table_rows)
+            df_list.append(df)
+
+        # Get exec_python_in_subprocess setting from app config
+        exec_python_in_subprocess = current_app.config.get('CLI_ARGS', {}).get('exec_python_in_subprocess', False)
+
+        # Run the transformation code
+        result = run_transform_in_sandbox2020(code, df_list, exec_python_in_subprocess)
+
+        if result['status'] == 'ok':
+            result_df = result['content']
+
+            # Convert result DataFrame to list of records
+            rows = json.loads(result_df.to_json(orient='records', date_format='iso'))
+
+            return jsonify({
+                "status": "ok",
+                "rows": rows,
+                "message": "Successfully refreshed derived data"
+            })
+        else:
+            return jsonify({
+                "status": "error",
+                "message": result.get('content', 'Unknown error during transformation')
+            }), 400
@@ -106,8 +106,13 @@
    try:
        exec(code, restricted_globals)
    except Exception as err:
-        error_message = f"Error: {type(err).__name__} - {str(err)}"
-        return {'status': 'error', 'error_message': error_message}
+        # Do not propagate detailed exception information (which may include
+        # stack traces, file paths, or other sensitive data) to the caller.
+        #
+        # Instead, return a generic error message, while allowing callers or
+        # outer layers to log detailed information if needed.
+        safe_error_message = "An error occurred while executing the transformation code."
+        return {'status': 'error', 'error_message': safe_error_message}
    return {'status': 'ok', 'allowed_objects': {key: restricted_globals[key] for key in allowed_objects}}
@@ -106,8 +106,13 @@
    try:
        exec(code, restricted_globals)
    except Exception as err:
-        error_message = f"Error: {type(err).__name__} - {str(err)}"
-        return {'status': 'error', 'error_message': error_message}
+        # Do not propagate detailed exception information (which may include
+        # stack traces, file paths, or other sensitive data) to the caller.
+        #
+        # Instead, return a generic error message, while allowing callers or
+        # outer layers to log detailed information if needed.
+        safe_error_message = "An error occurred while executing the transformation code."
+        return {'status': 'error', 'error_message': safe_error_message}

    return {'status': 'ok', 'allowed_objects': {key: restricted_globals[key] for key in allowed_objects}}

+
+    except Exception as e:
+        logger.error(f"Error refreshing derived data: {str(e)}")
+        logger.error(traceback.format_exc())
+        safe_msg, status_code = sanitize_db_error_message(e)
+        return jsonify({
+            "status": "error",
+            "message": safe_msg
         }), status_code
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "data_formulator"
-version = "0.5.1"
+version = "0.5.1.1"
 
 requires-python = ">=3.9"
 authors = [

diff --git a/src/app/App.tsx b/src/app/App.tsx
@@ -64,16 +64,12 @@ import { DictTable } from '../components/ComponentType';
 import { AppDispatch } from './store';
 import dfLogo from '../assets/df-logo.png';
 import { ModelSelectionButton } from '../views/ModelSelectionDialog';
-import { TableCopyDialogV2 } from '../views/TableSelectionView';
-import { TableUploadDialog } from '../views/TableSelectionView';
 import KeyboardArrowDownIcon from '@mui/icons-material/KeyboardArrowDown';
-import ContentPasteIcon from '@mui/icons-material/ContentPaste';
 import UploadFileIcon from '@mui/icons-material/UploadFile';
 import DownloadIcon from '@mui/icons-material/Download';
-import { DBTableSelectionDialog, handleDBDownload } from '../views/DBTableManager';
-import CloudQueueIcon from '@mui/icons-material/CloudQueue';
+import { handleDBDownload } from '../views/DBTableManager';
 import { getUrls } from './utils';
-import { DataLoadingChatDialog } from '../views/DataLoadingChat';
+import { UnifiedDataUploadDialog } from '../views/UnifiedDataUploadDialog';
 import ChatIcon from '@mui/icons-material/Chat';
 import { AgentRulesDialog } from '../views/AgentRulesDialog';
 import ArticleIcon from '@mui/icons-material/Article';
@@ -220,85 +216,23 @@ export interface AppFCProps {
 
 // Extract menu components into separate components to prevent full app re-renders
 const TableMenu: React.FC = () => {
-    const [anchorEl, setAnchorEl] = useState<null | HTMLElement>(null);
-    const [openDialog, setOpenDialog] = useState<'database' | 'extract' | 'paste' | 'upload' | null>(null);
-    const fileInputRef = React.useRef<HTMLInputElement>(null);
-    const open = Boolean(anchorEl);
-
-    const handleOpenDialog = (dialog: 'database' | 'extract' | 'paste' | 'upload') => {
-        setAnchorEl(null);
-        if (dialog === 'upload') {
-            // For file upload, trigger the hidden file input
-            fileInputRef.current?.click();
-        } else {
-            setOpenDialog(dialog);
-        }
-    };
+    const [dialogOpen, setDialogOpen] = useState<boolean>(false);
 
     return (
         <>
             <Button
                 variant="text"
-                onClick={(e) => setAnchorEl(e.currentTarget)}
-                endIcon={<KeyboardArrowDownIcon />}
-                aria-controls={open ? 'add-table-menu' : undefined}
-                aria-haspopup="true"
-                aria-expanded={open ? 'true' : undefined}
+                onClick={() => setDialogOpen(true)}
                 sx={{ textTransform: 'none' }}
             >
                 Data
             </Button>
-            <Menu
-                id="add-table-menu"
-                anchorEl={anchorEl}
-                open={open}
-                onClose={() => setAnchorEl(null)}
-                slotProps={{
-                    paper: { sx: { py: '4px', px: '8px' } }
-                }}
-                aria-labelledby="add-table-button"
-                sx={{ 
-                    '& .MuiMenuItem-root': { padding: '4px 8px' },
-                    '& .MuiTypography-root': { fontSize: 14, display: 'flex', alignItems: 'center', textTransform: 'none', gap: 1 }
-                }}
-            >
-                <MenuItem onClick={() => handleOpenDialog('database')}>
-                    <Typography fontSize="inherit">
-                        connect to database <CloudQueueIcon fontSize="inherit" /> 
-                    </Typography>
-                </MenuItem>
-                <MenuItem onClick={() => handleOpenDialog('extract')}>
-                    <Typography fontSize="inherit">
-                        extract data <span style={{fontSize: '11px'}}>(image/messy text)</span>
-                    </Typography>
-                </MenuItem>
-                <MenuItem onClick={() => handleOpenDialog('paste')}>
-                    <Typography>
-                        paste data <span style={{fontSize: '11px'}}>(csv/tsv)</span>
-                    </Typography>
-                </MenuItem>
-                <MenuItem onClick={() => handleOpenDialog('upload')}>
-                    <Typography>
-                        upload data file <span style={{fontSize: '11px'}}>(csv/tsv/json)</span>
-                    </Typography>
-                </MenuItem>
-            </Menu>
 
-            {/* Dialogs rendered outside the Menu to avoid keyboard event issues */}
-            <DBTableSelectionDialog 
-                open={openDialog === 'database'} 
-                onClose={() => setOpenDialog(null)} 
-            />
-            <DataLoadingChatDialog 
-                open={openDialog === 'extract'} 
-                onClose={() => setOpenDialog(null)} 
-            />
-            <TableCopyDialogV2 
-                open={openDialog === 'paste'} 
-                onClose={() => setOpenDialog(null)} 
-            />
-            <TableUploadDialog 
-                fileInputRef={fileInputRef}
+            {/* Unified Data Upload Dialog */}
+            <UnifiedDataUploadDialog 
+                open={dialogOpen}
+                onClose={() => setDialogOpen(false)}
+                initialTab="menu"
             />
         </>
     );

diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx
@@ -475,6 +475,29 @@ export const dataFormulatorSlice = createSlice({
             let attachedMetadata = action.payload.attachedMetadata;
             state.tables = state.tables.map(t => t.id == tableId ? {...t, attachedMetadata} : t);
         },
+        updateTableRows: (state, action: PayloadAction<{tableId: string, rows: any[]}>) => {
+            // Update the rows of a table while preserving all other table properties
+            // This is used for refreshing data in original (non-derived) tables
+            let tableId = action.payload.tableId;
+            let newRows = action.payload.rows;
+
+            state.tables = state.tables.map(t => {
+                if (t.id == tableId) {
+                    // Update metadata type inference based on new data
+                    let newMetadata = { ...t.metadata };
+                    for (let name of t.names) {
+                        if (newRows.length > 0 && name in newRows[0]) {
+                            newMetadata[name] = {
+                                ...newMetadata[name],
+                                type: inferTypeFromValueArray(newRows.map(r => r[name])),
+                            };
+                        }
+                    }
+                    return { ...t, rows: newRows, metadata: newMetadata };
+                }
+                return t;
+            });
+        },
         extendTableWithNewFields: (state, action: PayloadAction<{tableId: string, columnName: string, values: any[], previousName: string | undefined, parentIDs: string[]}>) => {
             // extend the existing extTable with new columns from the new table
             let newValues = action.payload.values;

diff --git a/src/app/store.ts b/src/app/store.ts
@@ -26,3 +26,6 @@ let store = configureStore({
 })
 
 export default store;
+
+// Export store instance for use in utilities
+export { store };