数据流的中位数

Author Avatar
Tianqi Zhang 11月 07, 2019

Problem

中位数是有序列表中间的数。如果列表长度是偶数,中位数则是中间两个数的平均值。

例如,

[2,3,4] 的中位数是 3

[2,3] 的中位数是 (2 + 3) / 2 = 2.5

设计一个支持以下两种操作的数据结构:

void addNum(int num) // 从数据流中添加一个整数到数据结构中。
double findMedian() // 返回目前所有元素的中位数。

Solution-1 维护有序序列

可以直接维护一个单调的序列,链表或者数组都可以,两者实现方法不同但是时间复杂度一样,这里给出链表的实现

struct IncListNode {
    int val;
    IncListNode* next;
    IncListNode* prev;

    IncListNode(): val(0), next(nullptr), prev(nullptr) {}
    IncListNode(int value, IncListNode* nextNode=nullptr, IncListNode* prevNode=nullptr)
        : val(value), next(nextNode), prev(prevNode) {}
};

class IncreasingList {
private:
    IncListNode* head;
    IncListNode* tail;
    int numElem;
    IncListNode* medianPtr;

    bool even() {
        return (numElem % 2) == 0;
    }
public:
    IncreasingList(): numElem(0) {
        head = new IncListNode();
        tail = new IncListNode();
        head->next = tail;
        tail->prev = head;
        medianPtr = head;
    }

    ~IncreasingList() {
        IncListNode* ptr = head;
        while (ptr) {
            IncListNode* tempPtr = ptr;
            ptr = ptr->next;
            delete tempPtr;
        }
    }

    /*  1. even, not cross: do nothing
        2. even, cross: ptr = ptr->next
        3. odd, not cross: ptr = ptr->prev
        4. odd, cross: do nothing
    */
    void insert(int n) {
        IncListNode* newNode = new IncListNode(n);
        IncListNode* ptr = head->next;
        bool cross = false;

        while(ptr != tail) {
            if (n > ptr->val) {
                if (ptr == medianPtr) cross = true;
                ptr = ptr->next;
            }
            else if (n <= ptr->val) {
                ptr->prev->next = newNode;
                newNode->prev = ptr->prev;
                ptr->prev = newNode;
                newNode->next = ptr;
                break;
            }
        }

        if (ptr == tail) {
            cross = true;
            ptr = tail->prev;
            tail->prev = newNode;
            newNode->next = tail;
            ptr->next = newNode;
            newNode->prev = ptr;
        }

        if (even() && cross) {
            medianPtr = medianPtr->next;
        }
        else if (!even() && !cross) {
            medianPtr = medianPtr->prev;
        }
        numElem++;
    }

    double median() {
        if (even()) 
            return static_cast<double>(medianPtr->val + medianPtr->next->val) / 2;
        else
            return static_cast<double>(medianPtr->val);
    }

    void print() {
        IncListNode* ptr = head->next;
        cout << "head -> ";
        while(ptr != tail) {
            cout << ptr->val << " -> ";
            ptr = ptr->next;
        }
        cout << "tail;  medPtr: " << medianPtr->val << endl;
    }
};

class MedianFinder {
private:
    IncreasingList* list;
public:
    /** initialize your data structure here. */
    MedianFinder() {
        list = new IncreasingList();
    }

    ~MedianFinder() {
        delete list;
    }

    void addNum(int num) {
        list->insert(num);
        //list->print();
    }

    double findMedian() {
        return list->median();
    }
};

/**
 * Your MedianFinder object will be instantiated and called as such:
 * MedianFinder* obj = new MedianFinder();
 * obj->addNum(num);
 * double param_2 = obj->findMedian();
 */

Solution-2 维护两个平衡的堆

可以维护两个size平衡的堆,一个大顶堆记录有序序列的前半段,另一个小顶堆记录有序序列的后半段。如果元素个数为2k+1个的话,则大顶堆记录前k+1个,小顶堆记录后k个。如果是2k的话就均分。这样计算中位数的时候就可以以常数复杂度返回,并且添加元素的复杂度是对数级别

// code-1
class MedianFinder {
    priority_queue<int> lo;                              // max heap
    priority_queue<int, vector<int>, greater<int>> hi;   // min heap

public:
    // Adds a number into the data structure.
    void addNum(int num)
    {
        lo.push(num);                                    // Add to max heap

        hi.push(lo.top());                               // balancing step
        lo.pop();

        if (lo.size() < hi.size()) {                     // maintain size property
            lo.push(hi.top());
            hi.pop();
        }
    }

    // Returns the median of current data stream
    double findMedian()
    {
        return lo.size() > hi.size() 
            ? (double) lo.top() : (lo.top() + hi.top()) * 0.5;
    }
};

// code-2
class MedianFinder {
private:
    priority_queue<int> low;
    priority_queue<int, vector<int>, std::greater<int>> high;
public:    
    void addNum(int num) {
        low.push(num);

        if (low.size() > high.size() + 1) {
            high.push(low.top()); 
            low.pop();
        }

        if (!low.empty() && !high.empty()) {
            while (low.top() > high.top()) {
                int temp = low.top();
                low.pop();
                low.push(high.top());
                high.pop();
                high.push(temp);
            }
        }
    }

    double findMedian() {
        if (low.size() > high.size()) return static_cast<double>(low.top());
        else return static_cast<double>(low.top() + high.top()) / 2;
    }
};