Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 4 additions & 19 deletions packages/components/nodes/chains/ApiChain/postCore.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { BaseLanguageModel } from '@langchain/core/language_models/base'
import { CallbackManagerForChainRun } from '@langchain/core/callbacks/manager'
import { BaseChain, ChainInputs, LLMChain, SerializedAPIChain } from 'langchain/chains'
import { BaseLanguageModel } from '@langchain/core/language_models/base'
import { BasePromptTemplate, PromptTemplate } from '@langchain/core/prompts'
import { ChainValues } from '@langchain/core/utils/types'
import fetch from 'node-fetch'
import { BaseChain, ChainInputs, LLMChain, SerializedAPIChain } from 'langchain/chains'
import { secureFetch } from '../../../src'

export const API_URL_RAW_PROMPT_TEMPLATE = `You are given the below API Documentation:
{api_docs}
Expand Down Expand Up @@ -92,22 +92,7 @@ export class APIChain extends BaseChain implements APIChainInput {

const { url, data } = JSON.parse(api_url_body)

// Validate request is not to internal/private networks
const urlObj = new URL(url)
const hostname = urlObj.hostname

if (
hostname === 'localhost' ||
hostname === '127.0.0.1' ||
hostname.startsWith('192.168.') ||
hostname.startsWith('10.') ||
hostname.startsWith('172.16.') ||
hostname.includes('internal')
) {
throw new Error('Access to internal networks is not allowed')
}

const res = await fetch(url, {
const res = await secureFetch(url, {
method: 'POST',
headers: this.headers,
body: JSON.stringify(data)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import { INode, INodeParams, INodeData, ICommonObject } from '../../../src/Interface'
import { getBaseClasses } from '../../../src/utils'
import { Tool } from '@langchain/core/tools'
import fetch from 'node-fetch'
import * as cheerio from 'cheerio'
import { URL } from 'url'
import { xmlScrape } from '../../../src/utils'
import { secureFetch } from '../../../src'
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { getBaseClasses, xmlScrape } from '../../../src/utils'

interface ScrapedPageData {
url: string
Expand Down Expand Up @@ -60,7 +59,7 @@ class WebScraperRecursiveTool extends Tool {

private async scrapeSingleUrl(url: string): Promise<Omit<ScrapedPageData, 'url'> & { foundLinks: string[] }> {
try {
const response = await fetch(url, { timeout: this.timeoutMs, redirect: 'follow', follow: 5 })
const response = await secureFetch(url, { timeout: this.timeoutMs, redirect: 'follow', follow: 5 })
if (!response.ok) {
const errorText = await response.text()
return {
Expand Down
197 changes: 107 additions & 90 deletions packages/components/src/httpSecurity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import * as ipaddr from 'ipaddr.js'
import dns from 'dns/promises'
import axios, { AxiosRequestConfig, AxiosResponse } from 'axios'
import fetch, { RequestInit, Response } from 'node-fetch'
import http from 'http'
import https from 'https'

/**
* Checks if an IP address is in the deny list
Expand Down Expand Up @@ -62,96 +64,58 @@ export async function checkDenyList(url: string): Promise<void> {
*/
export async function secureAxiosRequest(config: AxiosRequestConfig, maxRedirects: number = 5): Promise<AxiosResponse> {
let currentUrl = config.url
let redirectCount = 0
let currentConfig = { ...config, maxRedirects: 0 } // Disable automatic redirects

// Validate the initial URL
if (currentUrl) {
await checkDenyList(currentUrl)
if (!currentUrl) {
throw new Error('secureAxiosRequest: url is required')
}

while (redirectCount <= maxRedirects) {
try {
// Update the URL in config for subsequent requests
currentConfig.url = currentUrl

const response = await axios(currentConfig)

// If it's a successful response (not a redirect), return it
if (response.status < 300 || response.status >= 400) {
return response
}

// Handle redirect
const location = response.headers.location
if (!location) {
// No location header, but it's a redirect status - return the response
return response
}

redirectCount++
let redirects = 0
let currentConfig = { ...config, maxRedirects: 0 } // Disable automatic redirects

if (redirectCount > maxRedirects) {
throw new Error('Too many redirects')
while (redirects <= maxRedirects) {
const target = await resolveAndValidate(currentUrl)
const agent = createPinnedAgent(target)

currentConfig = {
...currentConfig,
url: currentUrl,
...(target.protocol === 'http' ? { httpAgent: agent } : { httpsAgent: agent }),
headers: {
...currentConfig.headers,
Host: target.hostname
}
}

// Resolve the redirect URL (handle relative URLs)
const redirectUrl = new URL(location, currentUrl).toString()

// Validate the redirect URL against the deny list
await checkDenyList(redirectUrl)
const response = await axios(currentConfig)

// Update current URL for next iteration
currentUrl = redirectUrl
// If it's a successful response (not a redirect), return it
if (response.status < 300 || response.status >= 400) {
return response
}

// For redirects, we only need to preserve certain headers and change method if needed
if (response.status === 301 || response.status === 302 || response.status === 303) {
// For 303, or when redirecting POST requests, change to GET
if (
response.status === 303 ||
(currentConfig.method && ['POST', 'PUT', 'PATCH'].includes(currentConfig.method.toUpperCase()))
) {
currentConfig.method = 'GET'
delete currentConfig.data
}
}
} catch (error) {
// If it's not a redirect-related error from axios, propagate it
if (error.response && error.response.status >= 300 && error.response.status < 400) {
// This is a redirect response that axios couldn't handle automatically
// Continue with our manual redirect handling
const response = error.response
const location = response.headers.location

if (!location) {
return response
}
// Handle redirect
const location = response.headers.location
if (!location) {
// No location header, but it's a redirect status - return the response
return response
}

redirectCount++
redirects++
if (redirects > maxRedirects) {
throw new Error('Too many redirects')
}

if (redirectCount > maxRedirects) {
throw new Error('Too many redirects')
}
currentUrl = new URL(location, currentUrl).toString()

const redirectUrl = new URL(location, currentUrl).toString()
await checkDenyList(redirectUrl)
currentUrl = redirectUrl

// Handle method changes for redirects
if (response.status === 301 || response.status === 302 || response.status === 303) {
if (
response.status === 303 ||
(currentConfig.method && ['POST', 'PUT', 'PATCH'].includes(currentConfig.method.toUpperCase()))
) {
currentConfig.method = 'GET'
delete currentConfig.data
}
}
continue
// For redirects, we only need to preserve certain headers and change method if needed
if (response.status === 301 || response.status === 302 || response.status === 303) {
// For 303, or when redirecting POST requests, change to GET
if (
response.status === 303 ||
(currentConfig.method && ['POST', 'PUT', 'PATCH'].includes(currentConfig.method.toUpperCase()))
) {
currentConfig.method = 'GET'
delete currentConfig.data
}

// For other errors, re-throw
throw error
}
}

Expand All @@ -171,11 +135,11 @@ export async function secureFetch(url: string, init?: RequestInit, maxRedirects:
let redirectCount = 0
let currentInit = { ...init, redirect: 'manual' as const } // Disable automatic redirects

// Validate the initial URL
await checkDenyList(currentUrl)

while (redirectCount <= maxRedirects) {
const response = await fetch(currentUrl, currentInit)
const resolved = await resolveAndValidate(currentUrl)
const agent = createPinnedAgent(resolved)

const response = await fetch(currentUrl, { ...currentInit, agent: () => agent })

// If it's a successful response (not a redirect), return it
if (response.status < 300 || response.status >= 400) {
Expand All @@ -196,13 +160,7 @@ export async function secureFetch(url: string, init?: RequestInit, maxRedirects:
}

// Resolve the redirect URL (handle relative URLs)
const redirectUrl = new URL(location, currentUrl).toString()

// Validate the redirect URL against the deny list
await checkDenyList(redirectUrl)

// Update current URL for next iteration
currentUrl = redirectUrl
currentUrl = new URL(location, currentUrl).toString()

// Handle method changes for redirects according to HTTP specs
if (response.status === 301 || response.status === 302 || response.status === 303) {
Expand All @@ -219,3 +177,62 @@ export async function secureFetch(url: string, init?: RequestInit, maxRedirects:

throw new Error('Too many redirects')
}

type ResolvedTarget = {
hostname: string
ip: string
family: 4 | 6
protocol: 'http' | 'https'
}

async function resolveAndValidate(url: string): Promise<ResolvedTarget> {
const denyListString = process.env.HTTP_DENY_LIST
const denyList = denyListString ? denyListString.split(',').map((s) => s.trim()) : null

const u = new URL(url)
const hostname = u.hostname
const protocol: 'http' | 'https' = u.protocol === 'https:' ? 'https' : 'http'

if (ipaddr.isValid(hostname)) {
if (denyList) {
isDeniedIP(hostname, denyList)
}

return {
hostname,
ip: hostname,
family: hostname.includes(':') ? 6 : 4,
protocol
}
}

const records = await dns.lookup(hostname, { all: true })
if (records.length === 0) {
throw new Error(`DNS resolution failed for ${hostname}`)
}

if (denyList) {
for (const r of records) {
isDeniedIP(r.address, denyList)
}
}

const chosen = records.find((r) => r.family === 4) ?? records[0]

return {
hostname,
ip: chosen.address,
family: chosen.family as 4 | 6,
protocol
}
}

function createPinnedAgent(target: ResolvedTarget): http.Agent | https.Agent {
const Agent = target.protocol === 'https' ? https.Agent : http.Agent

return new Agent({
lookup: (_host, _opts, cb) => {
cb(null, target.ip, target.family)
}
})
}