import { FileType } from './datasets';

interface BaseContent {
    fileType: string;
}

// #region CSV Content types
export interface CsvDocument extends BaseContent {
    fileType: 'Csv';
    content: Array<Array<Content>>;
    header?: Array<string>;
    markdown: string;
    entities: Array<DetectTextEntitiesSingleModel>;
}

// #endregion

// #region XLSX Content types
export interface XlsxDocument extends BaseContent {
    fileType: 'Xlsx';
    content: Record<string, Array<Array<Content>>>;
    markdown: string;
    entities: Array<DetectTextEntitiesSingleModel>;
}
// #endregion

// #region RAW Content types
export interface RawDocument extends BaseContent {
    fileType: 'Raw';
    content: Content;
    markdown: string;
    entities: Array<DetectTextEntitiesSingleModel>;
}
// #endregion

// #region PDF Content types
export interface PdfDocument extends BaseContent {
    fileType: 'Pdf';
    content: Content;
    pages: Array<Array<PdfPageContent>>;
    table: PdfTable;
    markdown: string;
    entities: Array<DetectTextEntitiesSingleModel>;
}

export interface PdfPageContent {
    type: FileType;
    content: Content;
}

export interface PdfTable extends Array<Array<PdfTableCell | null>> {}

export interface PdfTableCell {
    type: string;
    content: Content;
}
// #endregion

// #region DOCX Content types
export interface DocXDocument extends BaseContent {
    fileType: 'Docx';
    content: Content;
    footNotes: Array<Content>;
    endNotes: Array<Content>;
    header: Record<DocXHeaderFooterTypeEnum, Content>;
    footer: Record<DocXHeaderFooterTypeEnum, Content>;
    markdown: string;
    entities: Array<DetectTextEntitiesSingleModel>;
}

export enum DocXHeaderFooterTypeEnum {
    first,
    even,
    odd,
}
// #endregion

export interface Content {
    entities: DetectTextEntitiesSingleModel[];
    hash?: string;
    text?: string;
}

export type DetectTextEntitiesSingleModel = {
    start: number;
    end: number;
    pythonStart: number;
    pythonEnd: number;
    label: string;
    text: string;
    score: number;
    exampleRedaction: string | undefined | null;
};

enum OcrContentType {
    PageNumber = 'PageNumber',
    PageHeader = 'PageHeader',
    PageFooter = 'PageFooter',
    SectionHeading = 'SectionHeading',
    Title = 'Title',
    Paragraph = 'Paragraph',
}

export enum PdfTableCellType {
    ColumnHeader = 'ColumnHeader',
    RowHeader = 'RowHeader',
    StubHead = 'StubHead',
    Description = 'Description',
    Content = 'Content',
}

type OcrPageContent = {
    type: OcrContentType;
    content: Content;
};

export type OcrTableCell = {
    type: PdfTableCellType;
    content: Content;
};

export type OcrTable = (OcrTableCell | null)[][];

type OcrKeyValuePair = {
    id: string | number;
    key: string;
    value: Content;
    start: number;
    end: number;
};

export type TableContent = {
    tableName?: string;
    header?: string[];
    data?: (Content | null)[][];
};

export type BaseDocument = {
    fileType: FileType;
    content: Content;
    schemaVersion: number;
    keyValuePairs?: OcrKeyValuePair[];
    pages?: OcrPageContent[][];
    tables?: OcrTable[] | TableContent[];
};

export type AnyDocument = CsvDocument | XlsxDocument | RawDocument | PdfDocument | DocXDocument;
