Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
1a6c978
feat: add filename search to content and OCR search
Johnson-zs May 13, 2026
071ae33
feat: add natural language semantic search
Johnson-zs May 14, 2026
b48ad3e
feat: add relative time support for Chinese search
Johnson-zs May 14, 2026
4cbf3b0
feat: implement file size range filtering
Johnson-zs May 14, 2026
fc6216d
feat: add file size constraint support in semantic search
Johnson-zs May 14, 2026
5ab5ca2
feat: add action-based time field search support
Johnson-zs May 14, 2026
84f5aef
fix: automatically handle hidden path search conditions
Johnson-zs May 14, 2026
79a886d
feat: add location-based search support for Chinese NLP
Johnson-zs May 14, 2026
9c6f7b8
feat: add semantic query detection and multi-path search support
Johnson-zs May 14, 2026
ecc50e1
feat: add file size range filter to search strategies
Johnson-zs May 15, 2026
868ba70
fix: unify dfm-search library and path names
Johnson-zs May 15, 2026
2cfb394
feat: add file metadata attributes to search results
Johnson-zs May 15, 2026
caa556d
fix: improve Chinese NLP search functionality
Johnson-zs May 15, 2026
54bfe85
feat: add semantic search with detailed results
Johnson-zs May 15, 2026
648f64c
feat: enhance semantic search with explicit directories
Johnson-zs May 18, 2026
64dc847
feat: add max results limit for semantic search
Johnson-zs May 18, 2026
eb1079f
test: add search target control tests
Johnson-zs May 18, 2026
8a11f48
feat: add chinese NLP parsing for relative time and size constraints
Johnson-zs May 18, 2026
73050be
feat: add NGram analyzer and tokenizer for Lucene++
Johnson-zs May 18, 2026
bd89e8c
fix: improve content search engine validation and analyzer
Johnson-zs May 18, 2026
b7d77d9
refactor: optimize search filtering and query building
Johnson-zs May 19, 2026
745b093
feat: add on-demand content highlight retrieval
Johnson-zs May 19, 2026
44356d2
refactor: improve NGramTokenizer and search factory
Johnson-zs May 21, 2026
2ebb5df
refactor: improve OCR text search validation and analyzer selection
Johnson-zs May 21, 2026
4501c55
perf: optimize search performance with field selector
Johnson-zs May 22, 2026
4bc9565
refactor: disable unit tests in release builds
Johnson-zs May 22, 2026
6b3cb74
feat: optimize ngram search query building
Johnson-zs May 23, 2026
9c477b9
refactor: remove NGram analyzer and tokenizer components
Johnson-zs May 24, 2026
e789187
fix: adjust N-gram token position calculation
Johnson-zs May 24, 2026
455fb21
feat: enhance ContentRetriever with content fetching capabilities
Johnson-zs May 24, 2026
3888210
test: add test utility libraries for content search
Johnson-zs May 24, 2026
a9c2a9e
perf: optimize OCR text search document loading
Johnson-zs May 25, 2026
06a530d
perf: replace chinese analyzer with ngram search
Johnson-zs May 25, 2026
3681996
test: add filename search engine test cases
Johnson-zs May 25, 2026
5b9b379
docs: update license files and cleanup
Johnson-zs May 26, 2026
fad1858
chore: bump version to 1.3.57
Johnson-zs May 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,5 @@ AGENTS.md
.trellis
.claude
.agents
.codex

18 changes: 14 additions & 4 deletions .reuse/dep5
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,18 @@ Files: src/dfm-burn/3rdparty/udfclient/*
Copyright: Reinoud Zandijk <reinoud@netbsd.org>
License: ClArtistic

# fulltext
Files: src/dfm-search/3rdparty/fulltext/*
Copyright: 2009-2014 Alan Wright
License: LGPL-3.0-or-later
# cpp-stub (MIT)
Files: 3rdparty/testutils/cpp-stub/stub.h 3rdparty/testutils/cpp-stub/addr_any.h 3rdparty/testutils/cpp-stub/addr_pri.h
Copyright: jobczz
License: MIT

# ELFIO (MIT)
Files: 3rdparty/testutils/cpp-stub/elfio.hpp
Copyright: Sergei Tikhomirov
License: MIT

# semantic rules
Files: src/dfm-search/dfm-search-lib/semantic/rules/zh_CN/*.json
Copyright: 2026 UnionTech Software Technology Co., Ltd.
License: GPL-3.0-or-later

File renamed without changes.
280 changes: 280 additions & 0 deletions 3rdparty/testutils/cpp-stub/addr_any.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
#ifndef __ADDR_ANY_H__
#define __ADDR_ANY_H__


//linux
#include <regex.h>
#include <cxxabi.h>
//c
#include <cinttypes>
#include <cstdio>
#include <cstdlib>

//c++
#include <string>
#include <map>
//project
#include "elfio.hpp"



class AddrAny
{
public:
AddrAny()
{
m_init = get_exe_pathname(m_fullname);
m_baseaddr = 0;
}
AddrAny(std::string libname)
{
m_init = get_lib_pathname_and_baseaddr(libname, m_fullname, m_baseaddr);
}

int get_local_func_addr_symtab(std::string func_name_regex_str, std::map<std::string,void*>& result)
{
return get_func_addr(SHT_SYMTAB, STB_LOCAL, func_name_regex_str, result);
}
int get_global_func_addr_symtab(std::string func_name_regex_str, std::map<std::string,void*>& result)
{
return get_func_addr(SHT_SYMTAB, STB_GLOBAL, func_name_regex_str, result);
}
int get_weak_func_addr_symtab(std::string func_name_regex_str, std::map<std::string,void*>& result)
{
return get_func_addr(SHT_SYMTAB, STB_WEAK, func_name_regex_str, result);
}

int get_global_func_addr_dynsym( std::string func_name_regex_str, std::map<std::string,void*>& result)
{
return get_func_addr(SHT_DYNSYM, STB_GLOBAL, func_name_regex_str, result);
}
int get_weak_func_addr_dynsym(std::string func_name_regex_str, std::map<std::string,void*>& result)
{
return get_func_addr(SHT_DYNSYM, STB_WEAK, func_name_regex_str, result);
}

private:
bool demangle(std::string& s, std::string& name) {
int status;
char* pname = abi::__cxa_demangle(s.c_str(), 0, 0, &status);
if (status != 0)
{
switch(status)
{
case -1: name = "memory allocation error"; break;
case -2: name = "invalid name given"; break;
case -3: name = "internal error: __cxa_demangle: invalid argument"; break;
default: name = "unknown error occured"; break;
}
return false;
}
name = pname;
free(pname);
return true;
}
bool get_exe_pathname( std::string& name)
{
char line[512];
FILE *fp;
uintptr_t base_addr;
char perm[5];
unsigned long offset;
int pathname_pos;
char *pathname;
size_t pathname_len;
int match = 0;

if(NULL == (fp = fopen("/proc/self/maps", "r")))
{
return false;
}

while(fgets(line, sizeof(line), fp))
{
if(sscanf(line, "%" PRIxPTR "-%*lx %4s %lx %*x:%*x %*d%n", &base_addr, perm, &offset, &pathname_pos) != 3) continue;

if(0 != offset) continue;

//get pathname
while(isspace(line[pathname_pos]) && pathname_pos < (int)(sizeof(line) - 1))
pathname_pos += 1;
if(pathname_pos >= (int)(sizeof(line) - 1)) continue;
pathname = line + pathname_pos;
pathname_len = strlen(pathname);
if(0 == pathname_len) continue;
if(pathname[pathname_len - 1] == '\n')
{
pathname[pathname_len - 1] = '\0';
pathname_len -= 1;
}
if(0 == pathname_len) continue;
if('[' == pathname[0]) continue;

name = pathname;
match = 1;
break;

}
fclose(fp);

if(0 == match)
{
return false;
}
else
{
return true;
}

}

bool get_lib_pathname_and_baseaddr(std::string pathname_regex_str, std::string& name, unsigned long& addr)
{
char line[512];
FILE *fp;
uintptr_t base_addr;
char perm[5];
unsigned long offset;
int pathname_pos;
char *pathname;
size_t pathname_len;
int match;
regex_t pathname_regex;

regcomp(&pathname_regex, pathname_regex_str.c_str(), 0);

if(NULL == (fp = fopen("/proc/self/maps", "r")))
{
return false;
}

while(fgets(line, sizeof(line), fp))
{
if(sscanf(line, "%" PRIxPTR "-%*lx %4s %lx %*x:%*x %*d%n", &base_addr, perm, &offset, &pathname_pos) != 3) continue;

//check permission
if(perm[0] != 'r') continue;
if(perm[3] != 'p') continue; //do not touch the shared memory

//check offset
//
//We are trying to find ELF header in memory.
//It can only be found at the beginning of a mapped memory regions
//whose offset is 0.
if(0 != offset) continue;

//get pathname
while(isspace(line[pathname_pos]) && pathname_pos < (int)(sizeof(line) - 1))
pathname_pos += 1;
if(pathname_pos >= (int)(sizeof(line) - 1)) continue;
pathname = line + pathname_pos;
pathname_len = strlen(pathname);
if(0 == pathname_len) continue;
if(pathname[pathname_len - 1] == '\n')
{
pathname[pathname_len - 1] = '\0';
pathname_len -= 1;
}
if(0 == pathname_len) continue;
if('[' == pathname[0]) continue;

//check pathname
//if we need to hook this elf?
match = 0;
if(0 == regexec(&pathname_regex, pathname, 0, NULL, 0))
{
match = 1;
name = pathname;
addr = (unsigned long)base_addr;
break;
}
if(0 == match) continue;

}
fclose(fp);
if(0 == match)
{
return false;
}
else
{
return true;
}

}

int get_func_addr(unsigned int ttype, unsigned int stype, std::string& func_name_regex_str, std::map<std::string,void*>& result)
{
// Create an elfio reader
ELFIO::elfio reader;
int count = 0;
regex_t pathname_regex;

if(!m_init)
{
return -1;
}

regcomp(&pathname_regex, func_name_regex_str.c_str(), 0);
// Load ELF data
if(!reader.load(m_fullname.c_str()))
{
return -1;
}

ELFIO::Elf_Half sec_num = reader.sections.size();
for(int i = 0; i < sec_num; ++i)
{
ELFIO::section* psec = reader.sections[i];
// Check section type
if(psec->get_type() == ttype)
{
const ELFIO::symbol_section_accessor symbols( reader, psec );
for ( unsigned int j = 0; j < symbols.get_symbols_num(); ++j )
{
std::string name;
std::string name_mangle;
ELFIO::Elf64_Addr value;
ELFIO::Elf_Xword size;
unsigned char bind;
unsigned char type;
ELFIO::Elf_Half section_index;
unsigned char other;

// Read symbol properties
symbols.get_symbol( j, name, value, size, bind, type, section_index, other );
if(type == STT_FUNC && bind == stype)
{
bool ret = demangle(name,name_mangle);
if(ret == true)
{
if (0 == regexec(&pathname_regex, name_mangle.c_str(), 0, NULL, 0))
{
result.insert ( std::pair<std::string,void *>(name_mangle,(void*)(value + m_baseaddr)));
count++;
}
}
else
{
if (0 == regexec(&pathname_regex, name.c_str(), 0, NULL, 0))
{
result.insert ( std::pair<std::string,void *>(name,(void*)(value + m_baseaddr)));
count++;
}
}
}
}
break;
}
}

return count;
}
private:
bool m_init;
std::string m_name;
std::string m_fullname;
unsigned long m_baseaddr;

};
#endif
Loading
Loading