Hi, this is the pulse train producer I used in my master thesis. I needed to solve a variable modulo problem in VHDL for this. By introducing an estimator I was able to do it.

Pulse train producer

--DESCRIPTION
--This entity takes three parameters once it detects a edge on edge_in
--afterwards, it will produce a pulse train in accordance with these parameters
--the ratio between the high speed clock and the low speed clock is 8
--tested with a 800MHz high speed clock



-- DELAY:
-- cycles before it starts producing the pulse train, 0-2**8-1
-- as this entity most likely will run with a faster clockj then the rest, I added 
-- this parameter so you can sync stuff up beween the state machines clock cycles.
-- eg:
-- ___|^|_|^|_|^|____ (ch1) 
--________|^|________ (ch2)
-- cannot be done without the delay parameter as |^| has a lenfth of about 1.25ns (800MHz clock) while minimal reaction time
-- for the state machine is 100ns (running at 10Mhz right now)

-- LENGTH:
-- width of the pulses to be produced, 1-2**8
-- eg:
-- ___|^^|____ (ch1) -> length = 2
-- ___|^|_____ (ch2) -> length = 1

-- COUNT:
-- number of pulses to be procuded, 1-2**8
-- This is needed if you want to have a pulse train.
-- Pulses will always be symmetric (so _|^|_|^|_ or _|^^^|___|^^^|___ but never _|^|___|^|___)

-- Reminder:
-- all values must be below their max values, either 0-2**8-1 or 1-2**8

LIBRARY IEEE;
USE IEEE.STD_LOGIC_1164.ALL;
USE IEEE.NUMERIC_STD.ALL;
LIBRARY UNISIM;
USE UNISIM.VCOMPONENTS.ALL;

ENTITY pulser_generic IS
    PORT( 
        clk_in: IN STD_LOGIC;
        clk_high_speed_in: IN STD_LOGIC;
        edge_in: IN STD_LOGIC;
        rst_in: IN STD_LOGIC;
        length_in: IN Integer RANGE 1 TO 2**8;
        count_in: IN Integer RANGE 1 TO 2**8;
        delay_in: IN Integer RANGE 0 TO 2**8-1;
        pulse_out : OUT STD_LOGIC;
        finished_out: OUT STD_LOGIC
        );
END pulser_generic;

ARCHITECTURE main OF pulser_generic IS
    SIGNAL edge_in_old: STD_LOGIC;
    SIGNAL serdes_input: STD_LOGIC_VECTOR(7 DOWNTO 0);
    
    SIGNAL estimator: INTEGER RANGE 0 TO 2**8-1;
    SIGNAL estimator_increase: INTEGER RANGE 1 TO 4;
    SIGNAL chunk_counter: INTEGER RANGE -2**5 TO 2**14;
    
    SIGNAL length_act: INTEGER RANGE 1 TO 2**8;
    SIGNAL count_act: INTEGER RANGE 1 TO 2**8;
    SIGNAL delay_act: INTEGER RANGE 0 TO 7;
    
    SIGNAL running: STD_LOGIC;

BEGIN

OSERDES2_inst: OSERDESE2
    GENERIC MAP(
        TBYTE_CTL => "FALSE",
        TBYTE_SRC => "FALSE",
        DATA_RATE_OQ => "SDR",
        DATA_RATE_TQ => "SDR",
        DATA_WIDTH => 8,
        INIT_OQ => '0',
        INIT_TQ => '0',
        SERDES_MODE => "MASTER",
        SRVAL_OQ => '0',
        SRVAL_TQ => '0', 
        TRISTATE_WIDTH => 1
    )
    PORT MAP(
        OFB => open,
        OQ => pulse_out,
        SHIFTOUT1 => open,
        SHIFTOUT2 => open,
        TBYTEOUT => open,
        TFB => open,
        TQ => open,
        CLK => clk_high_speed_in,
        CLKDIV => clk_in,
        D1 => serdes_input(0),
        D2 => serdes_input(1),
        D3 => serdes_input(2),
        D4 => serdes_input(3),
        D5 => serdes_input(4),
        D6 => serdes_input(5),
        D7 => serdes_input(6),
        D8 => serdes_input(7),
        OCE => '1',
        RST => rst_in,
        SHIFTIN1 => '0',
        SHIFTIN2 => '0',
        T1 => '0',
        T2 => '0',
        T3 => '0',
        T4 => '0',
        TBYTEIN => '0',
        TCE => '0'
    );

finished_out <= NOT running;

PROCESS(clk_in)
BEGIN
    IF RISING_EDGE(clk_in) THEN
        edge_in_old <= edge_in;
        serdes_input <= ('0','0','0','0','0','0','0','0');
        IF rst_in = '1' THEN
            running <= '0';
        ELSIF running = '1' AND chunk_counter * 8 >= 2 * length_act * count_act + delay_act THEN
            running <= '0';
        ELSIF edge_in_old = '0' AND edge_in = '1' AND running = '0' THEN
            running <= '1';
            length_act <= length_in;
            count_act <= count_in;
            delay_act <= delay_in MOD 8;
            chunk_counter <= -delay_act / 8;
            estimator <= 0;
            CASE length_in IS
                WHEN  1  =>  estimator_increase <= 4;
                WHEN  2|3  =>  estimator_increase <= 2;
                WHEN OTHERS =>  estimator_increase <= 1;
            END CASE;
        ELSE
            IF estimator * 2 * length_act + delay_act < 8 * chunk_counter THEN
                estimator <= estimator + estimator_increase; 
            END IF;
            chunk_counter <= chunk_counter + 1;
            FOR i IN 0 TO 7 LOOP
                IF chunk_counter * 8 + i < 2*length_act*count_act + delay_act AND (
                --we should only have 4 here but somewhere is a "one-off"-error and I just cannot find it right now
                --the problem is that the estimator is being counted up one to early but I do not see why this is.
                --test with delay_act = 7 and delay_act = 0 to see this error
                ((2*estimator - 2)*length_act + delay_act <= chunk_counter * 8 + i AND chunk_counter * 8 + i < (2*estimator - 2 + 1)*length_act + delay_act) OR
                ((2*estimator + 0)*length_act + delay_act <= chunk_counter * 8 + i AND chunk_counter * 8 + i < (2*estimator + 0 + 1)*length_act + delay_act) OR
                ((2*estimator + 2)*length_act + delay_act <= chunk_counter * 8 + i AND chunk_counter * 8 + i < (2*estimator + 2 + 1)*length_act + delay_act) OR
                ((2*estimator + 4)*length_act + delay_act <= chunk_counter * 8 + i AND chunk_counter * 8 + i < (2*estimator + 4 + 1)*length_act + delay_act) OR
                ((2*estimator + 6)*length_act + delay_act <= chunk_counter * 8 + i AND chunk_counter * 8 + i < (2*estimator + 6 + 1)*length_act + delay_act)) THEN
                    serdes_input(i) <= running;
                END IF;
                --alternative version but did not generate optimized code
                --variable tmp: std_logic;
                --tmp:= '0'; 
                --FOR j IN -1 TO 3 LOOP
                --    tmp:= tmp and '1' IF ((2*estimator + 2*j)*length_act + delay_act <= chunk_counter * 8 + i AND chunk_counter * 8 + i < (2*estimator + 2*j + 1)*length_act + delay_act) ELSE '0';
                --END LOOP;
                --IF tmp = '1' THEN
                --    serdes_input(i) <= running;
                --END if;
            END LOOP;
        END IF;
    END IF;
END PROCESS;

END main;